diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/powerpc | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-zack/build-layout-experiment.tar.gz glibc-zack/build-layout-experiment.tar.xz glibc-zack/build-layout-experiment.zip |
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc')
983 files changed, 77241 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/Implies b/REORG.TODO/sysdeps/powerpc/Implies new file mode 100644 index 0000000000..78dba9510c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Implies @@ -0,0 +1,5 @@ +# On PowerPC we use the IBM extended long double format. +ieee754/ldbl-128ibm +ieee754/ldbl-opt +ieee754/dbl-64 +ieee754/flt-32 diff --git a/REORG.TODO/sysdeps/powerpc/Makefile b/REORG.TODO/sysdeps/powerpc/Makefile new file mode 100644 index 0000000000..e03a202c65 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Makefile @@ -0,0 +1,42 @@ +ifeq ($(subdir),string) +CFLAGS-memcmp.c += -Wno-uninitialized +endif + +ifeq ($(subdir),elf) +# extra shared linker files to link into dl-allobjs.so and libc +sysdep-dl-routines += dl-machine hwcapinfo +sysdep_routines += dl-machine hwcapinfo +# extra shared linker files to link only into dl-allobjs.so +sysdep-rtld-routines += dl-machine hwcapinfo +# Don't optimize GD tls sequence to LE. +LDFLAGS-tst-tlsopt-powerpc += -Wl,--no-tls-optimize +tests += tst-tlsopt-powerpc +endif + +ifeq ($(subdir),setjmp) +ifeq (yes,$(build-shared)) +sysdep_routines += novmx-longjmp novmx-sigjmp +endif +endif + +ifeq ($(subdir),csu) +# get offset to rtld_global._dl_hwcap and rtld_global._dl_hwcap2 +gen-as-const-headers += rtld-global-offsets.sym +# get offset to __locale_struct.__ctype_tolower +gen-as-const-headers += locale-defines.sym +endif + +ifeq ($(subdir),nptl) +tests-internal += test-get_hwcap test-get_hwcap-static +tests-static += test-get_hwcap-static +endif + +ifeq ($(subdir),misc) +sysdep_headers += sys/platform/ppc.h +tests += test-gettimebase +tests += tst-set_ppr +endif + +ifneq (,$(filter %le,$(config-machine))) +abilist-pattern = %-le.abilist +endif diff --git a/REORG.TODO/sysdeps/powerpc/Versions b/REORG.TODO/sysdeps/powerpc/Versions new file mode 100644 index 0000000000..95849668f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Versions @@ -0,0 +1,31 @@ +libm { + GLIBC_2.1 { + # symbols used in macros from sysdeps/powerpc/bits/fenv.h + __fe_dfl_env; __fe_enabled_env; __fe_nonieee_env; __fe_nomask_env; + } + GLIBC_2.25 { + __fe_dfl_mode; + } +} + +libc { + GLIBC_2.3.4 { + _longjmp; __sigsetjmp; _setjmp; + longjmp; setjmp; + } + GLIBC_PRIVATE { + __novmx__libc_longjmp; __novmx__libc_siglongjmp; + __vmx__libc_longjmp; __vmx__libc_siglongjmp; + } +} + +ld { + GLIBC_2.22 { + __tls_get_addr_opt; + } + GLIBC_2.23 { + # Symbol used to version control when the ABI started to specify that HWCAP + # and AT_PLATFORM data should be stored into the TCB. + __parse_hwcap_and_convert_at_platform; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/abort-instr.h b/REORG.TODO/sysdeps/powerpc/abort-instr.h new file mode 100644 index 0000000000..43746e65ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/abort-instr.h @@ -0,0 +1,2 @@ +/* An op-code of 0 is guaranteed to be illegal. */ +#define ABORT_INSTRUCTION asm (".long 0") diff --git a/REORG.TODO/sysdeps/powerpc/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/atomic-machine.h new file mode 100644 index 0000000000..0a58203a10 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/atomic-machine.h @@ -0,0 +1,339 @@ +/* Atomic operations. PowerPC Common version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Never include sysdeps/powerpc/atomic-machine.h directly. + * Alway use include/atomic.h which will include either + * sysdeps/powerpc/powerpc32/atomic-machine.h + * or + * sysdeps/powerpc/powerpc64/atomic-machine.h + * as appropriate and which in turn include this file. + */ + +#include <stdint.h> + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +/* + * Powerpc does not have byte and halfword forms of load and reserve and + * store conditional. So for powerpc we stub out the 8- and 16-bit forms. + */ +#define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \ + (abort (), 0) + +#define __arch_compare_and_exchange_bool_16_acq(mem, newval, oldval) \ + (abort (), 0) + +#ifdef UP +# define __ARCH_ACQ_INSTR "" +# define __ARCH_REL_INSTR "" +#else +# define __ARCH_ACQ_INSTR "isync" +# ifndef __ARCH_REL_INSTR +# define __ARCH_REL_INSTR "sync" +# endif +#endif + +#ifndef MUTEX_HINT_ACQ +# define MUTEX_HINT_ACQ +#endif +#ifndef MUTEX_HINT_REL +# define MUTEX_HINT_REL +#endif + +#define atomic_full_barrier() __asm ("sync" ::: "memory") + +#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile ( \ + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " cmpw %0,%2\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_compare_and_exchange_val_32_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \ + " cmpw %0,%2\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_atomic_exchange_32_acq(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile ( \ + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " stwcx. %3,0,%2\n" \ + " bne- 1b\n" \ + " " __ARCH_ACQ_INSTR \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_32_rel(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \ + " stwcx. %3,0,%2\n" \ + " bne- 1b" \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32_acq(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3" MUTEX_HINT_ACQ "\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b\n" \ + __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32_rel(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%3" MUTEX_HINT_REL "\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_increment_val_32(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: lwarx %0,0,%2\n" \ + " addi %0,%0,1\n" \ + " stwcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_val_32(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: lwarx %0,0,%2\n" \ + " subi %0,%0,1\n" \ + " stwcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_if_positive_32(mem) \ + ({ int __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3\n" \ + " cmpwi 0,%0,0\n" \ + " addi %1,%0,-1\n" \ + " ble 2f\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_compare_and_exchange_val_32_acq(mem, newval, oldval); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_compare_and_exchange_val_64_acq(mem, newval, oldval); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_compare_and_exchange_val_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_compare_and_exchange_val_32_rel(mem, newval, oldval); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_compare_and_exchange_val_64_rel(mem, newval, oldval); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_acq(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_32_acq (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_64_acq (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_rel(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_32_rel (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_64_rel (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_and_add(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32 (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64 (mem, value); \ + else \ + abort (); \ + __result; \ + }) +#define atomic_exchange_and_add_acq(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32_acq (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64_acq (mem, value); \ + else \ + abort (); \ + __result; \ + }) +#define atomic_exchange_and_add_rel(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32_rel (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64_rel (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_increment_val(mem) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*(mem)) == 4) \ + __result = __arch_atomic_increment_val_32 (mem); \ + else if (sizeof (*(mem)) == 8) \ + __result = __arch_atomic_increment_val_64 (mem); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_increment(mem) ({ atomic_increment_val (mem); (void) 0; }) + +#define atomic_decrement_val(mem) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*(mem)) == 4) \ + __result = __arch_atomic_decrement_val_32 (mem); \ + else if (sizeof (*(mem)) == 8) \ + __result = __arch_atomic_decrement_val_64 (mem); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_decrement(mem) ({ atomic_decrement_val (mem); (void) 0; }) + + +/* Decrement *MEM if it is > 0, and return the old value. */ +#define atomic_decrement_if_positive(mem) \ + ({ __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_decrement_if_positive_32 (mem); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_decrement_if_positive_64 (mem); \ + else \ + abort (); \ + __result; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/bits/endian.h b/REORG.TODO/sysdeps/powerpc/bits/endian.h new file mode 100644 index 0000000000..db150e9efc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/endian.h @@ -0,0 +1,36 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* PowerPC can be little or big endian. Hopefully gcc will know... */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# error Both BIG_ENDIAN and LITTLE_ENDIAN defined! +# endif +# define __BYTE_ORDER __BIG_ENDIAN +#else +# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define __BYTE_ORDER __LITTLE_ENDIAN +# else +# warning Cannot determine current byte order, assuming big-endian. +# define __BYTE_ORDER __BIG_ENDIAN +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/fenv.h b/REORG.TODO/sysdeps/powerpc/bits/fenv.h new file mode 100644 index 0000000000..c279b484f5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fenv.h @@ -0,0 +1,180 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + + +/* Define bits representing the exception. We use the bit positions of + the appropriate bits in the FPSCR... */ +enum + { + FE_INEXACT = +#define FE_INEXACT (1 << (31 - 6)) + FE_INEXACT, + FE_DIVBYZERO = +#define FE_DIVBYZERO (1 << (31 - 5)) + FE_DIVBYZERO, + FE_UNDERFLOW = +#define FE_UNDERFLOW (1 << (31 - 4)) + FE_UNDERFLOW, + FE_OVERFLOW = +#define FE_OVERFLOW (1 << (31 - 3)) + FE_OVERFLOW, + + /* ... except for FE_INVALID, for which we use bit 31. FE_INVALID + actually corresponds to bits 7 through 12 and 21 through 23 + in the FPSCR, but we can't use that because the current draft + says that it must be a power of 2. Instead we use bit 2 which + is the summary bit for all the FE_INVALID exceptions, which + kind of makes sense. */ + FE_INVALID = +#define FE_INVALID (1 << (31 - 2)) + FE_INVALID, + +#ifdef __USE_GNU + /* Breakdown of the FE_INVALID bits. Setting FE_INVALID on an + input to a routine is equivalent to setting all of these bits; + FE_INVALID will be set on output from a routine iff one of + these bits is set. Note, though, that you can't disable or + enable these exceptions individually. */ + + /* Operation with a sNaN. */ + FE_INVALID_SNAN = +# define FE_INVALID_SNAN (1 << (31 - 7)) + FE_INVALID_SNAN, + + /* Inf - Inf */ + FE_INVALID_ISI = +# define FE_INVALID_ISI (1 << (31 - 8)) + FE_INVALID_ISI, + + /* Inf / Inf */ + FE_INVALID_IDI = +# define FE_INVALID_IDI (1 << (31 - 9)) + FE_INVALID_IDI, + + /* 0 / 0 */ + FE_INVALID_ZDZ = +# define FE_INVALID_ZDZ (1 << (31 - 10)) + FE_INVALID_ZDZ, + + /* Inf * 0 */ + FE_INVALID_IMZ = +# define FE_INVALID_IMZ (1 << (31 - 11)) + FE_INVALID_IMZ, + + /* Comparison with a NaN. */ + FE_INVALID_COMPARE = +# define FE_INVALID_COMPARE (1 << (31 - 12)) + FE_INVALID_COMPARE, + + /* Invalid operation flag for software (not set by hardware). */ + /* Note that some chips don't have this implemented, presumably + because no-one expected anyone to write software for them %-). */ + FE_INVALID_SOFTWARE = +# define FE_INVALID_SOFTWARE (1 << (31 - 21)) + FE_INVALID_SOFTWARE, + + /* Square root of negative number (including -Inf). */ + /* Note that some chips don't have this implemented. */ + FE_INVALID_SQRT = +# define FE_INVALID_SQRT (1 << (31 - 22)) + FE_INVALID_SQRT, + + /* Conversion-to-integer of a NaN or a number too large or too small. */ + FE_INVALID_INTEGER_CONVERSION = +# define FE_INVALID_INTEGER_CONVERSION (1 << (31 - 23)) + FE_INVALID_INTEGER_CONVERSION + +# define FE_ALL_INVALID \ + (FE_INVALID_SNAN | FE_INVALID_ISI | FE_INVALID_IDI | FE_INVALID_ZDZ \ + | FE_INVALID_IMZ | FE_INVALID_COMPARE | FE_INVALID_SOFTWARE \ + | FE_INVALID_SQRT | FE_INVALID_INTEGER_CONVERSION) +#endif + }; + +#define FE_ALL_EXCEPT \ + (FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID) + +/* PowerPC chips support all of the four defined rounding modes. We + use the bit pattern in the FPSCR as the values for the + appropriate macros. */ +enum + { + FE_TONEAREST = +#define FE_TONEAREST 0 + FE_TONEAREST, + FE_TOWARDZERO = +#define FE_TOWARDZERO 1 + FE_TOWARDZERO, + FE_UPWARD = +#define FE_UPWARD 2 + FE_UPWARD, + FE_DOWNWARD = +#define FE_DOWNWARD 3 + FE_DOWNWARD + }; + +/* Type representing exception flags. */ +typedef unsigned int fexcept_t; + +/* Type representing floating-point environment. We leave it as 'double' + for efficiency reasons (rather than writing it to a 32-bit integer). */ +typedef double fenv_t; + +/* If the default argument is used we use this value. */ +extern const fenv_t __fe_dfl_env; +#define FE_DFL_ENV (&__fe_dfl_env) + +#ifdef __USE_GNU +/* Floating-point environment where all exceptions are enabled. Note that + this is not sufficient to give you SIGFPE. */ +extern const fenv_t __fe_enabled_env; +# define FE_ENABLED_ENV (&__fe_enabled_env) + +/* Floating-point environment with (processor-dependent) non-IEEE floating + point. */ +extern const fenv_t __fe_nonieee_env; +# define FE_NONIEEE_ENV (&__fe_nonieee_env) + +/* Floating-point environment with all exceptions enabled. Note that + just evaluating this value does not change the processor exception mode. + Passing this mask to fesetenv will result in a prctl syscall to change + the MSR FE0/FE1 bits to "Precise Mode". On some processors this will + result in slower floating point execution. This will last until an + fenv or exception mask is installed that disables all FP exceptions. */ +# define FE_NOMASK_ENV FE_ENABLED_ENV + +/* Floating-point environment with all exceptions disabled. Note that + just evaluating this value does not change the processor exception mode. + Passing this mask to fesetenv will result in a prctl syscall to change + the MSR FE0/FE1 bits to "Ignore Exceptions Mode". On most processors + this allows the fastest possible floating point execution.*/ +# define FE_MASK_ENV FE_DFL_ENV + +#endif + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef double femode_t; + +/* Default floating-point control modes. */ +extern const femode_t __fe_dfl_mode; +# define FE_DFL_MODE (&__fe_dfl_mode) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h b/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h new file mode 100644 index 0000000000..4110bdfbbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h @@ -0,0 +1,79 @@ +/* Inline floating-point environment handling functions for powerpc. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ + +/* Inline definition for fegetround. */ +# define __fegetround() \ + (__extension__ ({ int __fegetround_result; \ + __asm__ __volatile__ \ + ("mcrfs 7,7 ; mfcr %0" \ + : "=r"(__fegetround_result) : : "cr7"); \ + __fegetround_result & 3; })) +# define fegetround() __fegetround () + +# ifndef __NO_MATH_INLINES +/* The weird 'i#*X' constraints on the following suppress a gcc + warning when __excepts is not a constant. Otherwise, they mean the + same as just plain 'i'. */ + +# if __GNUC_PREREQ(3, 4) + +/* Inline definition for feraiseexcept. */ +# define feraiseexcept(__excepts) \ + (__extension__ ({ \ + int __e = __excepts; \ + int __ret; \ + if (__builtin_constant_p (__e) \ + && (__e & (__e - 1)) == 0 \ + && __e != FE_INVALID) \ + { \ + if (__e != 0) \ + __asm__ __volatile__ ("mtfsb1 %0" \ + : : "i#*X" (__builtin_clz (__e))); \ + __ret = 0; \ + } \ + else \ + __ret = feraiseexcept (__e); \ + __ret; \ + })) + +/* Inline definition for feclearexcept. */ +# define feclearexcept(__excepts) \ + (__extension__ ({ \ + int __e = __excepts; \ + int __ret; \ + if (__builtin_constant_p (__e) \ + && (__e & (__e - 1)) == 0 \ + && __e != FE_INVALID) \ + { \ + if (__e != 0) \ + __asm__ __volatile__ ("mtfsb0 %0" \ + : : "i#*X" (__builtin_clz (__e))); \ + __ret = 0; \ + } \ + else \ + __ret = feclearexcept (__e); \ + __ret; \ + })) + +# endif /* __GNUC_PREREQ(3, 4). */ + +# endif /* !__NO_MATH_INLINES. */ + +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h b/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h new file mode 100644 index 0000000000..9faf1b7c51 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h @@ -0,0 +1,39 @@ +/* Define FP_FAST_* macros. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/fp-fast.h> directly; include <math.h> instead." +#endif + +#ifdef __USE_ISOC99 + +/* The GCC 4.6 compiler will define __FP_FAST_FMA{,F,L} if the fma{,f,l} + builtins are supported. */ +# if (!defined _SOFT_FLOAT && !defined __NO_FPRS__) || defined __FP_FAST_FMA +# define FP_FAST_FMA 1 +# endif + +# if (!defined _SOFT_FLOAT && !defined __NO_FPRS__) || defined __FP_FAST_FMAF +# define FP_FAST_FMAF 1 +# endif + +# ifdef __FP_FAST_FMAL +# define FP_FAST_FMAL 1 +# endif + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/hwcap.h b/REORG.TODO/sysdeps/powerpc/bits/hwcap.h new file mode 100644 index 0000000000..c9daeedfde --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/hwcap.h @@ -0,0 +1,71 @@ +/* Defines for bits in AT_HWCAP and AT_HWCAP2. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined(_SYS_AUXV_H) && !defined(_SYSDEPS_SYSDEP_H) +# error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead." +#endif + +/* The bit numbers must match those in the kernel's asm/cputable.h. */ + +/* Feature definitions in AT_HWCAP. */ +#define PPC_FEATURE_32 0x80000000 /* 32-bit mode. */ +#define PPC_FEATURE_64 0x40000000 /* 64-bit mode. */ +#define PPC_FEATURE_601_INSTR 0x20000000 /* 601 chip, Old POWER ISA. */ +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 /* SIMD/Vector Unit. */ +#define PPC_FEATURE_HAS_FPU 0x08000000 /* Floating Point Unit. */ +#define PPC_FEATURE_HAS_MMU 0x04000000 /* Memory Management Unit. */ +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 /* 4xx Multiply Accumulator. */ +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 /* Unified I/D cache. */ +#define PPC_FEATURE_HAS_SPE 0x00800000 /* Signal Processing ext. */ +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 /* SPE Float. */ +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 /* SPE Double. */ +#define PPC_FEATURE_NO_TB 0x00100000 /* 601/403gx have no timebase */ +#define PPC_FEATURE_POWER4 0x00080000 /* POWER4 ISA 2.00 */ +#define PPC_FEATURE_POWER5 0x00040000 /* POWER5 ISA 2.02 */ +#define PPC_FEATURE_POWER5_PLUS 0x00020000 /* POWER5+ ISA 2.03 */ +#define PPC_FEATURE_CELL_BE 0x00010000 /* CELL Broadband Engine */ +#define PPC_FEATURE_BOOKE 0x00008000 /* ISA Category Embedded */ +#define PPC_FEATURE_SMT 0x00004000 /* Simultaneous + Multi-Threading */ +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 /* ISA 2.05 */ +#define PPC_FEATURE_PA6T 0x00000800 /* PA Semi 6T Core */ +#define PPC_FEATURE_HAS_DFP 0x00000400 /* Decimal FP Unit */ +#define PPC_FEATURE_POWER6_EXT 0x00000200 /* P6 + mffgpr/mftgpr */ +#define PPC_FEATURE_ARCH_2_06 0x00000100 /* ISA 2.06 */ +#define PPC_FEATURE_HAS_VSX 0x00000080 /* P7 Vector Extension. */ +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040 +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* Feature definitions in AT_HWCAP2. */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 /* ISA 2.07 */ +#define PPC_FEATURE2_HAS_HTM 0x40000000 /* Hardware Transactional + Memory */ +#define PPC_FEATURE2_HAS_DSCR 0x20000000 /* Data Stream Control + Register */ +#define PPC_FEATURE2_HAS_EBB 0x10000000 /* Event Base Branching */ +#define PPC_FEATURE2_HAS_ISEL 0x08000000 /* Integer Select */ +#define PPC_FEATURE2_HAS_TAR 0x04000000 /* Target Address Register */ +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 /* Target supports vector + instruction. */ +#define PPC_FEATURE2_HTM_NOSC 0x01000000 /* Kernel aborts transaction + when a syscall is made. */ +#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.0 */ +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float + 128-bit */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/link.h b/REORG.TODO/sysdeps/powerpc/bits/link.h new file mode 100644 index 0000000000..1cab121a65 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/link.h @@ -0,0 +1,156 @@ +/* Machine-specific declarations for dynamic linker interface. PowerPC version + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + + +#if __ELF_NATIVE_CLASS == 32 + +/* Registers for entry into PLT on PPC32. */ +typedef struct La_ppc32_regs +{ + uint32_t lr_reg[8]; + double lr_fp[8]; + uint32_t lr_vreg[12][4]; + uint32_t lr_r1; + uint32_t lr_lr; +} La_ppc32_regs; + +/* Return values for calls from PLT on PPC32. */ +typedef struct La_ppc32_retval +{ + uint32_t lrv_r3; + uint32_t lrv_r4; + double lrv_fp[8]; + uint32_t lrv_v2[4]; +} La_ppc32_retval; + + +__BEGIN_DECLS + +extern Elf32_Addr la_ppc32_gnu_pltenter (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc32_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc32_gnu_pltexit (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc32_regs *__inregs, + La_ppc32_retval *__outregs, + const char *__symname); + +__END_DECLS + +#elif __ELF_NATIVE_CLASS == 64 +# if _CALL_ELF != 2 + +/* Registers for entry into PLT on PPC64. */ +typedef struct La_ppc64_regs +{ + uint64_t lr_reg[8]; + double lr_fp[13]; + uint32_t __padding; + uint32_t lr_vrsave; + uint32_t lr_vreg[12][4]; + uint64_t lr_r1; + uint64_t lr_lr; +} La_ppc64_regs; + +/* Return values for calls from PLT on PPC64. */ +typedef struct La_ppc64_retval +{ + uint64_t lrv_r3; + uint64_t lrv_r4; + double lrv_fp[4]; /* f1-f4, float - complex long double. */ + uint32_t lrv_v2[4]; /* v2. */ +} La_ppc64_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_ppc64_gnu_pltenter (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc64_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc64_gnu_pltexit (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc64_regs *__inregs, + La_ppc64_retval *__outregs, + const char *__symname); + +__END_DECLS + +# else + +/* Registers for entry into PLT on PPC64 in the ELFv2 ABI. */ +typedef struct La_ppc64v2_regs +{ + uint64_t lr_reg[8]; + double lr_fp[13]; + uint32_t __padding; + uint32_t lr_vrsave; + uint32_t lr_vreg[12][4] __attribute__ ((aligned (16))); + uint64_t lr_r1; + uint64_t lr_lr; +} La_ppc64v2_regs; + +/* Return values for calls from PLT on PPC64 in the ELFv2 ABI. */ +typedef struct La_ppc64v2_retval +{ + uint64_t lrv_r3; + uint64_t lrv_r4; + double lrv_fp[10]; + uint32_t lrv_vreg[8][4] __attribute__ ((aligned (16))); +} La_ppc64v2_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_ppc64v2_gnu_pltenter (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc64v2_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc64v2_gnu_pltexit (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc64v2_regs *__inregs, + La_ppc64v2_retval *__outregs, + const char *__symname); + +__END_DECLS + +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/mathinline.h b/REORG.TODO/sysdeps/powerpc/bits/mathinline.h new file mode 100644 index 0000000000..e5f0cd30f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/mathinline.h @@ -0,0 +1,132 @@ +/* Inline math functions for powerpc. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_inline +#endif /* __cplusplus */ + +#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ + +#ifdef __USE_ISOC99 +# if !__GNUC_PREREQ (2,97) +# define __unordered_cmp(x, y) \ + (__extension__ \ + ({ __typeof__(x) __x = (x); __typeof__(y) __y = (y); \ + unsigned __r; \ + __asm__("fcmpu 7,%1,%2 ; mfcr %0" : "=r" (__r) : "f" (__x), "f"(__y) \ + : "cr7"); \ + __r; })) + +# undef isgreater +# undef isgreaterequal +# undef isless +# undef islessequal +# undef islessgreater +# undef isunordered + +# define isgreater(x, y) (__unordered_cmp (x, y) >> 2 & 1) +# define isgreaterequal(x, y) ((__unordered_cmp (x, y) & 6) != 0) +# define isless(x, y) (__unordered_cmp (x, y) >> 3 & 1) +# define islessequal(x, y) ((__unordered_cmp (x, y) & 0xA) != 0) +# define islessgreater(x, y) ((__unordered_cmp (x, y) & 0xC) != 0) +# define isunordered(x, y) (__unordered_cmp (x, y) & 1) + +# endif /* __GNUC_PREREQ (2,97) */ + +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +# if __GNUC_PREREQ (2, 8) +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ +#if __GNUC_PREREQ (4, 0) + return __builtin_signbitf (__x); +#else + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; +#endif +} +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ +#if __GNUC_PREREQ (4, 0) + return __builtin_signbit (__x); +#else + __extension__ union { double __d; long long __i; } __u = { __d: __x }; + return __u.__i < 0; +#endif +} +# ifdef __LONG_DOUBLE_128__ +__MATH_INLINE int +__NTH (__signbitl (long double __x)) +{ + return __signbit ((double) __x); +} +# endif +# endif +#endif /* __USE_ISOC99 */ + +#if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ + +#ifdef __USE_ISOC99 + +# ifndef __powerpc64__ +__MATH_INLINE long int lrint (double __x) __THROW; +__MATH_INLINE long int +__NTH (lrint (double __x)) +{ + union { + double __d; + long long __ll; + } __u; + __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x)); + return __u.__ll; +} + +__MATH_INLINE long int lrintf (float __x) __THROW; +__MATH_INLINE long int +__NTH (lrintf (float __x)) +{ + return lrint ((double) __x); +} +# endif + +__MATH_INLINE double fdim (double __x, double __y) __THROW; +__MATH_INLINE double +__NTH (fdim (double __x, double __y)) +{ + return __x <= __y ? 0 : __x - __y; +} + +__MATH_INLINE float fdimf (float __x, float __y) __THROW; +__MATH_INLINE float +__NTH (fdimf (float __x, float __y)) +{ + return __x <= __y ? 0 : __x - __y; +} + +#endif /* __USE_ISOC99 */ +#endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */ +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/setjmp.h b/REORG.TODO/sysdeps/powerpc/bits/setjmp.h new file mode 100644 index 0000000000..02568951e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/setjmp.h @@ -0,0 +1,50 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define the machine-dependent type `jmp_buf'. PowerPC version. */ +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +/* The previous bits/setjmp.h had __jmp_buf defined as a structure. + We use an array of 'long int' instead, to make writing the + assembler easier. Naturally, user code should not depend on + either representation. */ + +#include <bits/wordsize.h> + +/* The current powerpc 32-bit Altivec ABI specifies for SVR4 ABI and EABI + the vrsave must be at byte 248 & v20 at byte 256. So we must pad this + correctly on 32 bit. It also insists that vecregs are only gauranteed + 4 byte alignment so we need to use vperm in the setjmp/longjmp routines. + We have to version the code because members like int __mask_was_saved + in the jmp_buf will move as jmp_buf is now larger than 248 bytes. We + cannot keep the altivec jmp_buf backward compatible with the jmp_buf. */ +#ifndef _ASM +# if __WORDSIZE == 64 +typedef long int __jmp_buf[64] __attribute__ ((__aligned__ (16))); +# else +/* The alignment is not essential, i.e.the buffer can be copied to a 4 byte + aligned buffer as per the ABI it is just added for performance reasons. */ +typedef long int __jmp_buf[64 + (12 * 4)] __attribute__ ((__aligned__ (16))); +# endif +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/powerpc/dl-procinfo.c b/REORG.TODO/sysdeps/powerpc/dl-procinfo.c new file mode 100644 index 0000000000..cd7329b84e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-procinfo.c @@ -0,0 +1,77 @@ +/* Data for processor capability information. PowerPC version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_HWCAP_COUNT + definition in procinfo.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +# define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_powerpc_cap_flags +#else +PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10] +#endif +#ifndef PROCINFO_DECL += { + "ppcle", "true_le", "", "", + "", "", "archpmu", "vsx", + "arch_2_06", "power6x", "dfp", "pa6t", + "arch_2_05", "ic_snoop", "smt", "booke", + "cellbe", "power5+", "power5", "power4", + "notb", "efpdouble", "efpsingle", "spe", + "ucache", "4xxmac", "mmu", "fpu", + "altivec", "ppc601", "ppc64", "ppc32", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "ieee128", "arch_3_00", + "htm-nosc", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/REORG.TODO/sysdeps/powerpc/dl-procinfo.h b/REORG.TODO/sysdeps/powerpc/dl-procinfo.h new file mode 100644 index 0000000000..216d20fbff --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-procinfo.h @@ -0,0 +1,185 @@ +/* Processor capability information handling macros. PowerPC version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 + +#include <ldsodefs.h> +#include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */ + +/* The total number of available bits (including those prior to + _DL_HWCAP_FIRST). Some of these bits might not be used. */ +#define _DL_HWCAP_COUNT 64 + +/* Features started at bit 31 and decremented as new features were added. */ +#define _DL_HWCAP_LAST 31 + +/* AT_HWCAP2 features started at bit 31 and decremented as new features were + added. HWCAP2 feature bits start at bit 0. */ +#define _DL_HWCAP2_LAST 31 + +/* These bits influence library search. */ +#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + + PPC_FEATURE_HAS_DFP) + +#define _DL_PLATFORMS_COUNT 15 + +#define _DL_FIRST_PLATFORM 32 +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \ + << _DL_FIRST_PLATFORM) + +/* Platform bits (relative to _DL_FIRST_PLATFORM). */ +#define PPC_PLATFORM_POWER4 0 +#define PPC_PLATFORM_PPC970 1 +#define PPC_PLATFORM_POWER5 2 +#define PPC_PLATFORM_POWER5_PLUS 3 +#define PPC_PLATFORM_POWER6 4 +#define PPC_PLATFORM_CELL_BE 5 +#define PPC_PLATFORM_POWER6X 6 +#define PPC_PLATFORM_POWER7 7 +#define PPC_PLATFORM_PPCA2 8 +#define PPC_PLATFORM_PPC405 9 +#define PPC_PLATFORM_PPC440 10 +#define PPC_PLATFORM_PPC464 11 +#define PPC_PLATFORM_PPC476 12 +#define PPC_PLATFORM_POWER8 13 +#define PPC_PLATFORM_POWER9 14 + +static inline const char * +__attribute__ ((unused)) +_dl_hwcap_string (int idx) +{ + return GLRO(dl_powerpc_cap_flags)[idx]; +} + +static inline int +__attribute__ ((unused)) +_dl_string_hwcap (const char *str) +{ + for (int i = 0; i < _DL_HWCAP_COUNT; ++i) + if (strcmp (str, _dl_hwcap_string (i)) == 0) + return i; + return -1; +} + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + if (str == NULL) + return -1; + + if (strncmp (str, "power", 5) == 0) + { + int ret; + str += 5; + switch (*str) + { + case '4': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER4; + break; + case '5': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5; + if (str[1] == '+') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5_PLUS; + ++str; + } + break; + case '6': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6; + if (str[1] == 'x') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6X; + ++str; + } + break; + case '7': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER7; + break; + case '8': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER8; + break; + case '9': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER9; + break; + default: + return -1; + } + if (str[1] == '\0') + return ret; + } + else if (strncmp (str, "ppc", 3) == 0) + { + if (strcmp (str + 3, "970") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC970; + else if (strcmp (str + 3, "-cell-be") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_CELL_BE; + else if (strcmp (str + 3, "a2") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPCA2; + else if (strcmp (str + 3, "405") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC405; + else if (strcmp (str + 3, "440") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC440; + else if (strcmp (str + 3, "464") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC464; + else if (strcmp (str + 3, "476") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC476; + } + + return -1; +} + +#if IS_IN (rtld) +static inline int +__attribute__ ((unused)) +_dl_procinfo (unsigned int type, unsigned long int word) +{ + switch(type) + { + case AT_HWCAP: + _dl_printf ("AT_HWCAP: "); + + for (int i = 0; i <= _DL_HWCAP_LAST; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (i)); + break; + case AT_HWCAP2: + { + unsigned int offset = _DL_HWCAP_LAST + 1; + + _dl_printf ("AT_HWCAP2: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP2 features starting with the high bits. */ + for (int i = 0; i <= _DL_HWCAP2_LAST; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (offset + i)); + break; + } + default: + /* This should not happen. */ + return -1; + } + _dl_printf ("\n"); + return 0; +} +#endif + +#endif /* dl-procinfo.h */ diff --git a/REORG.TODO/sysdeps/powerpc/dl-tls.c b/REORG.TODO/sysdeps/powerpc/dl-tls.c new file mode 100644 index 0000000000..f666d53b49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-tls.c @@ -0,0 +1,24 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "elf/dl-tls.c" + +#ifdef SHARED +strong_alias(__tls_get_addr, __tls_get_addr_opt) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/dl-tls.h b/REORG.TODO/sysdeps/powerpc/dl-tls.h new file mode 100644 index 0000000000..54beaee5ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-tls.h @@ -0,0 +1,52 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PPC_DL_TLS_H +# define _PPC_DL_TLS_H 1 + +/* Type used for the representation of TLS information in the TOC. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +/* The thread pointer points 0x7000 past the first static TLS block. */ +#define TLS_TP_OFFSET 0x7000 + +/* Dynamic thread vector pointers point 0x8000 past the start of each + TLS block. */ +#define TLS_DTV_OFFSET 0x8000 + +/* Compute the value for a @tprel reloc. */ +#define TLS_TPREL_VALUE(sym_map, sym, reloc) \ + ((sym_map)->l_tls_offset + (sym)->st_value + (reloc)->r_addend \ + - TLS_TP_OFFSET) + +/* Compute the value for a @dtprel reloc. */ +#define TLS_DTPREL_VALUE(sym, reloc) \ + ((sym)->st_value + (reloc)->r_addend - TLS_DTV_OFFSET) + +#ifdef SHARED +extern void *__tls_get_addr (tls_index *ti); + +# define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) +# define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) +#endif + +#endif /* dl-tls.h */ diff --git a/REORG.TODO/sysdeps/powerpc/ffs.c b/REORG.TODO/sysdeps/powerpc/ffs.c new file mode 100644 index 0000000000..125683b0f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ffs.c @@ -0,0 +1,47 @@ +/* Find first set bit in a word, counted from least significant end. + For PowerPC. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define ffsl __something_else +#include <limits.h> +#include <string.h> + +#undef ffs + +#ifdef __GNUC__ + +int +__ffs (int x) +{ + int cnt; + + asm ("cntlzw %0,%1" : "=r" (cnt) : "r" (x & -x)); + return 32 - cnt; +} +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) +#if ULONG_MAX == UINT_MAX +#undef ffsl +weak_alias (__ffs, ffsl) +#endif + +#else +#include <string/ffs.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/fpu/Makefile new file mode 100644 index 0000000000..53470a9cf2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/Makefile @@ -0,0 +1,7 @@ +ifeq ($(subdir),math) +libm-support += fenv_const fe_nomask fe_mask t_sqrt +endif + +ifeq ($(subdir),stdlib) +tests += tst-setcontext-fpscr +endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c b/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c new file mode 100644 index 0000000000..2685ca6ba0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c @@ -0,0 +1,134 @@ +/* Pythagorean addition using doubles + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +static const double two60 = 1.152921504606847e+18; +static const double two500 = 3.2733906078961419e+150; +static const double two600 = 4.149515568880993e+180; +static const double two1022 = 4.49423283715579e+307; +static const double twoM500 = 3.054936363499605e-151; +static const double twoM600 = 2.4099198651028841e-181; +static const double two60factor = 1.5592502418239997e+290; +static const double pdnum = 2.225073858507201e-308; + +/* __ieee754_hypot(x,y) + * + * This a FP only version without any FP->INT conversion. + * It is similar to default C version, making appropriates + * overflow and underflows checks as well scaling when it + * is needed. + */ + +#ifdef _ARCH_PWR7 +/* POWER7 isinf and isnan optimization are fast. */ +# define TEST_INF_NAN(x, y) \ + if ((isinf(x) || isinf(y)) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + if (isnan(x) || isnan(y)) \ + return x + y; +# else +/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are + * costly (especially for POWER6). */ +# define GET_TW0_HIGH_WORD(d1,d2,i1,i2) \ + do { \ + ieee_double_shape_type gh_u1; \ + ieee_double_shape_type gh_u2; \ + gh_u1.value = (d1); \ + gh_u2.value = (d2); \ + (i1) = gh_u1.parts.msw & 0x7fffffff; \ + (i2) = gh_u2.parts.msw & 0x7fffffff; \ + } while (0) + +# define TEST_INF_NAN(x, y) \ + do { \ + uint32_t hx, hy; \ + GET_TW0_HIGH_WORD(x, y, hx, hy); \ + if (hy > hx) { \ + uint32_t ht = hx; hx = hy; hy = ht; \ + } \ + if (hx >= 0x7ff00000) { \ + if ((hx == 0x7ff00000 || hy == 0x7ff00000) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + return x + y; \ + } \ + } while (0) + +#endif + + +double +__ieee754_hypot (double x, double y) +{ + x = fabs (x); + y = fabs (y); + + TEST_INF_NAN (x, y); + + if (y > x) + { + double t = x; + x = y; + y = t; + } + if (y == 0.0) + return x; + /* if y is higher enough, y * 2^60 might overflow. The tests if + y >= 1.7976931348623157e+308/2^60 (two60factor) and uses the + appropriate check to avoid the overflow exception generation. */ + if (y > two60factor) + { + if ((x / y) > two60) + return x + y; + } + else + { + if (x > (y * two60)) + return x + y; + } + if (x > two500) + { + x *= twoM600; + y *= twoM600; + return __ieee754_sqrt (x * x + y * y) / twoM600; + } + if (y < twoM500) + { + if (y <= pdnum) + { + x *= two1022; + y *= two1022; + double ret = __ieee754_sqrt (x * x + y * y) / two1022; + math_check_force_underflow_nonneg (ret); + return ret; + } + else + { + x *= two600; + y *= two600; + return __ieee754_sqrt (x * x + y * y) / two600; + } + } + return __ieee754_sqrt (x * x + y * y); +} +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c new file mode 100644 index 0000000000..8502ca962a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c @@ -0,0 +1,76 @@ +/* Pythagorean addition using floats + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +/* __ieee754_hypotf(x,y) + + This a FP only version without any FP->INT conversion. + It is similar to default C version, making appropriates + overflow and underflows checks as using double precision + instead of scaling. */ + +#ifdef _ARCH_PWR7 +/* POWER7 isinf and isnan optimizations are fast. */ +# define TEST_INF_NAN(x, y) \ + if ((isinff(x) || isinff(y)) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + if (isnanf(x) || isnanf(y)) \ + return x + y; +# else +/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are + * costly (especially for POWER6). */ +# define GET_TWO_FLOAT_WORD(f1,f2,i1,i2) \ + do { \ + ieee_float_shape_type gf_u1; \ + ieee_float_shape_type gf_u2; \ + gf_u1.value = (f1); \ + gf_u2.value = (f2); \ + (i1) = gf_u1.word & 0x7fffffff; \ + (i2) = gf_u2.word & 0x7fffffff; \ + } while (0) + +# define TEST_INF_NAN(x, y) \ + do { \ + uint32_t hx, hy; \ + GET_TWO_FLOAT_WORD(x, y, hx, hy); \ + if (hy > hx) { \ + uint32_t ht = hx; hx = hy; hy = ht; \ + } \ + if (hx >= 0x7f800000) { \ + if ((hx == 0x7f800000 || hy == 0x7f800000) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + return x + y; \ + } \ + } while (0) +#endif + + +float +__ieee754_hypotf (float x, float y) +{ + TEST_INF_NAN (x, y); + + return __ieee754_sqrt ((double) x * x + (double) y * y); +} +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c b/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c new file mode 100644 index 0000000000..8563e7c5e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c @@ -0,0 +1,188 @@ +/* e_rem_pio2f.c -- float version of e_rem_pio2.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#include <math_private.h> +#include "s_float_bitwise.h" + +/* defined in sysdeps/powerpc/fpu/k_rem_pio2f.c */ +int __fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx); + +/* __ieee754_rem_pio2f(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + */ + +static const float npio2_hw[] = { + 1.57077026e+00, 3.14154053e+00, 4.71228027e+00, 6.28308105e+00, + 7.85388184e+00, 9.42456055e+00, 1.09953613e+01, 1.25661621e+01, + 1.41369629e+01, 1.57077637e+01, 1.72783203e+01, 1.88491211e+01, + 2.04199219e+01, 2.19907227e+01, 2.35615234e+01, 2.51323242e+01, + 2.67031250e+01, 2.82739258e+01, 2.98447266e+01, 3.14155273e+01, + 3.29863281e+01, 3.45566406e+01, 3.61279297e+01, 3.76982422e+01, + 3.92695312e+01, 4.08398438e+01, 4.24111328e+01, 4.39814453e+01, + 4.55527344e+01, 4.71230469e+01, 4.86943359e+01, 5.02646484e+01 +}; + + +static const float zero = 0.0000000000e+00; +static const float two8 = 2.5600000000e+02; + +static const float half = 5.0000000000e-01; +static const float invpio2 = 6.3661980629e-01; +static const float pio2_1 = 1.5707855225e+00; +static const float pio2_1t = 1.0804334124e-05; +static const float pio2_2 = 1.0804273188e-05; +static const float pio2_2t = 6.0770999344e-11; +static const float pio2_3 = 6.0770943833e-11; +static const float pio2_3t = 6.1232342629e-17; + +static const float pio4 = 7.8539801e-01; +static const float pio3_4 = 2.3561945e+00; +static const float pio2_24b = 1.5707951e+00; +static const float pio2_2e7 = 2.0106054e+02; + + +int32_t +__ieee754_rem_pio2f (float x, float *y) +{ + float ax, z, n, r, w, t, e0; + float tx[3]; + int32_t i, nx; + + ax = __builtin_fabsf (x); + if (ax <= pio4) + { + y[0] = x; + y[1] = 0; + return 0; + } + if (ax < pio3_4) + { + if (x > 0) + { + z = x - pio2_1; + if (!__float_and_test28 (ax, pio2_24b)) + { + y[0] = z - pio2_1t; + y[1] = (z - y[0]) - pio2_1t; + } + else + { + z -= pio2_2; + y[0] = z - pio2_2t; + y[1] = (z - y[0]) - pio2_2t; + } + return 1; + } + else + { + z = x + pio2_1; + if (!__float_and_test28 (ax, pio2_24b)) + { + y[0] = z + pio2_1t; + y[1] = (z - y[0]) + pio2_1t; + } + else + { + z += pio2_2; + y[0] = z + pio2_2t; + y[1] = (z - y[0]) + pio2_2t; + } + return -1; + } + } + if (ax <= pio2_2e7) + { + n = __floorf (ax * invpio2 + half); + i = (int32_t) n; + r = ax - n * pio2_1; + w = n * pio2_1t; /* 1st round good to 40 bit */ + if (i < 32 && !__float_and_test24 (ax, npio2_hw[i - 1])) + { + y[0] = r - w; + } + else + { + float i, j; + j = __float_and8 (ax); + y[0] = r - w; + i = __float_and8 (y[0]); + if (j / i > 256.0 || j / i < 3.9062500e-3) + { /* 2nd iterations needed, good to 57 */ + t = r; + w = n * pio2_2; + r = t - w; + w = n * pio2_2t - ((t - r) - w); + y[0] = r - w; + i = __float_and8 (y[0]); + if (j / i > 33554432 || j / i < 2.9802322e-8) + { /* 3rd iteration needed, 74 bits acc */ + t = r; + w = n * pio2_3; + r = t - w; + w = n * pio2_3t - ((t - r) - w); + y[0] = r - w; + } + } + } + y[1] = (r - y[0]) - w; + if (x < 0) + { + y[0] = -y[0]; + y[1] = -y[1]; + return -i; + } + else + { + return i; + } + } + + /* all other (large) arguments */ + if (isnanf (x) || isinff (x)) + { + y[0] = y[1] = x - x; + return 0; + } + + /* set z = scalbn(|x|,ilogb(x)-7) */ + e0 = __float_and8 (ax / 128.0); + z = ax / e0; + + tx[0] = __floorf (z); + z = (z - tx[0]) * two8; + tx[1] = __floorf (z); + z = (z - tx[1]) * two8; + tx[2] = __floorf (z); + + nx = 3; + while (tx[nx - 1] == zero) + nx--; + + i = __fp_kernel_rem_pio2f (tx, y, e0, nx); + if (x < 0) + { + y[0] = -y[0]; + y[1] = -y[1]; + return -i; + } + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c new file mode 100644 index 0000000000..1c8977d6ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c @@ -0,0 +1,175 @@ +/* Double-precision floating point square root. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <fenv_libc.h> +#include <inttypes.h> +#include <stdint.h> +#include <sysdep.h> +#include <ldsodefs.h> + +#ifndef _ARCH_PPCSQ +static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */ +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +extern const float __t_sqrt[1024]; + +/* The method is based on a description in + Computation of elementary functions on the IBM RISC System/6000 processor, + P. W. Markstein, IBM J. Res. Develop, 34(1) 1990. + Basically, it consists of two interleaved Newton-Raphson approximations, + one to find the actual square root, and one to find its reciprocal + without the expense of a division operation. The tricky bit here + is the use of the POWER/PowerPC multiply-add operation to get the + required accuracy with high speed. + + The argument reduction works by a combination of table lookup to + obtain the initial guesses, and some careful modification of the + generated guesses (which mostly runs on the integer unit, while the + Newton-Raphson is running on the FPU). */ + +double +__slow_ieee754_sqrt (double x) +{ + const float inf = a_inf.value; + + if (x > 0) + { + /* schedule the EXTRACT_WORDS to get separation between the store + and the load. */ + ieee_double_shape_type ew_u; + ieee_double_shape_type iw_u; + ew_u.value = (x); + if (x != inf) + { + /* Variables named starting with 's' exist in the + argument-reduced space, so that 2 > sx >= 0.5, + 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... . + Variables named ending with 'i' are integer versions of + floating-point values. */ + double sx; /* The value of which we're trying to find the + square root. */ + double sg, g; /* Guess of the square root of x. */ + double sd, d; /* Difference between the square of the guess and x. */ + double sy; /* Estimate of 1/2g (overestimated by 1ulp). */ + double sy2; /* 2*sy */ + double e; /* Difference between y*g and 1/2 (se = e * fsy). */ + double shx; /* == sx * fsg */ + double fsg; /* sg*fsg == g. */ + fenv_t fe; /* Saved floating-point environment (stores rounding + mode and whether the inexact exception is + enabled). */ + uint32_t xi0, xi1, sxi, fsgi; + const float *t_sqrt; + + fe = fegetenv_register (); + /* complete the EXTRACT_WORDS (xi0,xi1,x) operation. */ + xi0 = ew_u.parts.msw; + xi1 = ew_u.parts.lsw; + relax_fenv_state (); + sxi = (xi0 & 0x3fffffff) | 0x3fe00000; + /* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation + between the store and the load. */ + iw_u.parts.msw = sxi; + iw_u.parts.lsw = xi1; + t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe); + sg = t_sqrt[0]; + sy = t_sqrt[1]; + /* complete the INSERT_WORDS (sx, sxi, xi1) operation. */ + sx = iw_u.value; + + /* Here we have three Newton-Raphson iterations each of a + division and a square root and the remainder of the + argument reduction, all interleaved. */ + sd = -__builtin_fma (sg, sg, -sx); + fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000; + sy2 = sy + sy; + sg = __builtin_fma (sy, sd, sg); /* 16-bit approximation to + sqrt(sx). */ + + /* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation + between the store and the load. */ + INSERT_WORDS (fsg, fsgi, 0); + iw_u.parts.msw = fsgi; + iw_u.parts.lsw = (0); + e = -__builtin_fma (sy, sg, -almost_half); + sd = -__builtin_fma (sg, sg, -sx); + if ((xi0 & 0x7ff00000) == 0) + goto denorm; + sy = __builtin_fma (e, sy2, sy); + sg = __builtin_fma (sy, sd, sg); /* 32-bit approximation to + sqrt(sx). */ + sy2 = sy + sy; + /* complete the INSERT_WORDS (fsg, fsgi, 0) operation. */ + fsg = iw_u.value; + e = -__builtin_fma (sy, sg, -almost_half); + sd = -__builtin_fma (sg, sg, -sx); + sy = __builtin_fma (e, sy2, sy); + shx = sx * fsg; + sg = __builtin_fma (sy, sd, sg); /* 64-bit approximation to + sqrt(sx), but perhaps + rounded incorrectly. */ + sy2 = sy + sy; + g = sg * fsg; + e = -__builtin_fma (sy, sg, -almost_half); + d = -__builtin_fma (g, sg, -shx); + sy = __builtin_fma (e, sy2, sy); + fesetenv_register (fe); + return __builtin_fma (sy, d, g); + denorm: + /* For denormalised numbers, we normalise, calculate the + square root, and return an adjusted result. */ + fesetenv_register (fe); + return __slow_ieee754_sqrt (x * two108) * twom54; + } + } + else if (x < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + __feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + __feraiseexcept (FE_INVALID); + x = a_nan.value; + } + return f_wash (x); +} +#endif /* _ARCH_PPCSQ */ + +#undef __ieee754_sqrt +double +__ieee754_sqrt (double x) +{ + double z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrt (x); +#endif + + return z; +} +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c new file mode 100644 index 0000000000..65d27b4d42 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c @@ -0,0 +1,150 @@ +/* Single-precision floating point square root. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <fenv_libc.h> +#include <inttypes.h> +#include <stdint.h> +#include <sysdep.h> +#include <ldsodefs.h> + +#ifndef _ARCH_PPCSQ +static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */ +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two48 = 281474976710656.0; +static const float twom24 = 5.9604644775390625e-8; +extern const float __t_sqrt[1024]; + +/* The method is based on a description in + Computation of elementary functions on the IBM RISC System/6000 processor, + P. W. Markstein, IBM J. Res. Develop, 34(1) 1990. + Basically, it consists of two interleaved Newton-Raphson approximations, + one to find the actual square root, and one to find its reciprocal + without the expense of a division operation. The tricky bit here + is the use of the POWER/PowerPC multiply-add operation to get the + required accuracy with high speed. + + The argument reduction works by a combination of table lookup to + obtain the initial guesses, and some careful modification of the + generated guesses (which mostly runs on the integer unit, while the + Newton-Raphson is running on the FPU). */ + +float +__slow_ieee754_sqrtf (float x) +{ + const float inf = a_inf.value; + + if (x > 0) + { + if (x != inf) + { + /* Variables named starting with 's' exist in the + argument-reduced space, so that 2 > sx >= 0.5, + 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... . + Variables named ending with 'i' are integer versions of + floating-point values. */ + float sx; /* The value of which we're trying to find the square + root. */ + float sg, g; /* Guess of the square root of x. */ + float sd, d; /* Difference between the square of the guess and x. */ + float sy; /* Estimate of 1/2g (overestimated by 1ulp). */ + float sy2; /* 2*sy */ + float e; /* Difference between y*g and 1/2 (note that e==se). */ + float shx; /* == sx * fsg */ + float fsg; /* sg*fsg == g. */ + fenv_t fe; /* Saved floating-point environment (stores rounding + mode and whether the inexact exception is + enabled). */ + uint32_t xi, sxi, fsgi; + const float *t_sqrt; + + GET_FLOAT_WORD (xi, x); + fe = fegetenv_register (); + relax_fenv_state (); + sxi = (xi & 0x3fffffff) | 0x3f000000; + SET_FLOAT_WORD (sx, sxi); + t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe); + sg = t_sqrt[0]; + sy = t_sqrt[1]; + + /* Here we have three Newton-Raphson iterations each of a + division and a square root and the remainder of the + argument reduction, all interleaved. */ + sd = -__builtin_fmaf (sg, sg, -sx); + fsgi = (xi + 0x40000000) >> 1 & 0x7f800000; + sy2 = sy + sy; + sg = __builtin_fmaf (sy, sd, sg); /* 16-bit approximation to + sqrt(sx). */ + e = -__builtin_fmaf (sy, sg, -almost_half); + SET_FLOAT_WORD (fsg, fsgi); + sd = -__builtin_fmaf (sg, sg, -sx); + sy = __builtin_fmaf (e, sy2, sy); + if ((xi & 0x7f800000) == 0) + goto denorm; + shx = sx * fsg; + sg = __builtin_fmaf (sy, sd, sg); /* 32-bit approximation to + sqrt(sx), but perhaps + rounded incorrectly. */ + sy2 = sy + sy; + g = sg * fsg; + e = -__builtin_fmaf (sy, sg, -almost_half); + d = -__builtin_fmaf (g, sg, -shx); + sy = __builtin_fmaf (e, sy2, sy); + fesetenv_register (fe); + return __builtin_fmaf (sy, d, g); + denorm: + /* For denormalised numbers, we normalise, calculate the + square root, and return an adjusted result. */ + fesetenv_register (fe); + return __slow_ieee754_sqrtf (x * two48) * twom24; + } + } + else if (x < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + x = a_nan.value; + } + return f_washf (x); +} +#endif /* _ARCH_PPCSQ */ + +#undef __ieee754_sqrtf +float +__ieee754_sqrtf (float x) +{ + double z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrtf (x); +#endif + + return z; +} +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c new file mode 100644 index 0000000000..2ee9547833 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c @@ -0,0 +1,49 @@ +/* Clear given exceptions in current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feclearexcept +int +__feclearexcept (int excepts) +{ + fenv_union_t u, n; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Clear the relevant bits. */ + n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) + | (excepts & FPSCR_STICKY_BITS)); + + /* Put the new state in effect. */ + if (u.l != n.l) + fesetenv_register (n.fenv); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c b/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c new file mode 100644 index 0000000000..bbe41a9d92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c @@ -0,0 +1,32 @@ +/* Procedure definition for FE_MASK_ENV. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <errno.h> + +/* This is a generic stub. An OS specific override is required to clear + the FE0/FE1 bits in the MSR. MSR update is privileged, so this will + normally involve a syscall. */ + +const fenv_t * +__fe_mask_env(void) +{ + __set_errno (ENOSYS); + return FE_DFL_ENV; +} +stub_warning (__fe_mask_env) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c b/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c new file mode 100644 index 0000000000..3b42aedd15 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c @@ -0,0 +1,32 @@ +/* Procedure definition for FE_NOMASK_ENV. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <errno.h> + +/* This is a generic stub. An OS specific override is required to set + the FE0/FE1 bits in the MSR. MSR update is privileged, so this will + normally involve a syscall. */ + +const fenv_t * +__fe_nomask_env_priv (void) +{ + __set_errno (ENOSYS); + return FE_ENABLED_ENV; +} +stub_warning (__fe_nomask_env_priv) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c new file mode 100644 index 0000000000..bb10b4701a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c @@ -0,0 +1,57 @@ +/* Disable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fedisableexcept (int excepts) +{ + fenv_union_t fe, curr; + int result, new; + + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); + + if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) + excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; + + /* Sets the new exception mask. */ + if (excepts & FE_INEXACT) + fe.l &= ~(1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) + fe.l &= ~(1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) + fe.l &= ~(1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) + fe.l &= ~(1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) + fe.l &= ~(1 << (31 - FPSCR_VE)); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); + + new = __fegetexcept (); + if (new == 0 && result != 0) + (void)__fe_mask_env (); + + if ((new & excepts) != 0) + result = -1; + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c new file mode 100644 index 0000000000..7f0b333fc6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c @@ -0,0 +1,58 @@ +/* Enable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +feenableexcept (int excepts) +{ + fenv_union_t fe, curr; + int result, new; + + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); + + if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) + excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; + + /* Sets the new exception mask. */ + if (excepts & FE_INEXACT) + fe.l |= (1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) + fe.l |= (1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) + fe.l |= (1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) + fe.l |= (1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) + fe.l |= (1 << (31 - FPSCR_VE)); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); + + new = __fegetexcept (); + if (new != 0 && result == 0) + (void) __fe_nomask_env_priv (); + + if ((new & excepts) != excepts) + result = -1; + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c new file mode 100644 index 0000000000..251977beba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c @@ -0,0 +1,38 @@ +/* Store current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetenv (fenv_t *envp) +{ + *envp = fegetenv_register (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c new file mode 100644 index 0000000000..2b9899abe2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c @@ -0,0 +1,43 @@ +/* Get floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexcept (void) +{ + fenv_union_t fe; + int result = 0; + + fe.fenv = fegetenv_register (); + + if (fe.l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; + if (fe.l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; + if (fe.l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; + if (fe.l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; + if (fe.l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + + return result; +} +weak_alias (__fegetexcept, fegetexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c new file mode 100644 index 0000000000..5597000a4b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c @@ -0,0 +1,26 @@ +/* Store current floating-point control modes. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetmode (femode_t *modep) +{ + *modep = fegetenv_register (); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c new file mode 100644 index 0000000000..bedb02f0e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c @@ -0,0 +1,30 @@ +/* Return current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +(__fegetround) (void) +{ + return __fegetround(); +} +#undef fegetround +#undef __fegetround +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c new file mode 100644 index 0000000000..fef49c33a9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c @@ -0,0 +1,51 @@ +/* Store current floating-point environment and clear exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t old, new; + + /* Save the currently set exceptions. */ + old.fenv = *envp = fegetenv_register (); + + /* Clear everything except for the rounding modes and non-IEEE arithmetic + flag. */ + new.l = old.l & 0xffffffff00000007LL; + + if (new.l == old.l) + return 0; + + /* If the old env had any enabled exceptions, then mask SIGFPE in the + MSR FE0/FE1 bits. This may allow the FPU to run faster because it + always takes the default action and can not generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0) + (void)__fe_mask_env (); + + /* Put the new state in effect. */ + fesetenv_register (new.fenv); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c new file mode 100644 index 0000000000..c5e088c98e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c @@ -0,0 +1,36 @@ +/* Constants for fenv_bits.h. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* We want to specify the bit pattern of the __fe_*_env constants, so + pretend they're really `long long' instead of `double'. */ + +/* If the default argument is used we use this value. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = +0xfff8000000000000ULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = +0xfff80000000000f8ULL; + +/* Floating-point environment with the NI bit set. */ +const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) = +0xfff8000000000004ULL; diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h b/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h new file mode 100644 index 0000000000..500ac042b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h @@ -0,0 +1,176 @@ +/* Internal libc stuff for floating point environment routines. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> +#include <ldsodefs.h> +#include <sysdep.h> + +extern const fenv_t *__fe_nomask_env_priv (void); + +extern const fenv_t *__fe_mask_env (void) attribute_hidden; + +/* The sticky bits in the FPSCR indicating exceptions have occurred. */ +#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID) + +/* Equivalent to fegetenv, but returns a fenv_t instead of taking a + pointer. */ +#define fegetenv_register() \ + ({ fenv_t env; asm volatile ("mffs %0" : "=f" (env)); env; }) + +/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ +#define fesetenv_register(env) \ + do { \ + double d = (env); \ + if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm volatile (".machine push; " \ + ".machine \"power6\"; " \ + "mtfsf 0xff,%0,1,0; " \ + ".machine pop" : : "f" (d)); \ + else \ + asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \ + } while(0) + +/* This very handy macro: + - Sets the rounding mode to 'round to nearest'; + - Sets the processor into IEEE mode; and + - Prevents exceptions from being raised for inexact results. + These things happen to be exactly what you need for typical elementary + functions. */ +#define relax_fenv_state() \ + do { \ + if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm (".machine push; .machine \"power6\"; " \ + "mtfsfi 7,0,1; .machine pop"); \ + asm ("mtfsfi 7,0"); \ + } while(0) + +/* Set/clear a particular FPSCR bit (for instance, + reset_fpscr_bit(FPSCR_VE); + prevents INVALID exceptions from being raised). */ +#define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "i"(x)) +#define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "i"(x)) + +typedef union +{ + fenv_t fenv; + unsigned long long l; +} fenv_union_t; + + +static inline int +__fesetround_inline (int round) +{ + if ((unsigned int) round < 2) + { + asm volatile ("mtfsb0 30"); + if ((unsigned int) round == 0) + asm volatile ("mtfsb0 31"); + else + asm volatile ("mtfsb1 31"); + } + else + { + asm volatile ("mtfsb1 30"); + if ((unsigned int) round == 2) + asm volatile ("mtfsb0 31"); + else + asm volatile ("mtfsb1 31"); + } + + return 0; +} + +/* Definitions of all the FPSCR bit numbers */ +enum { + FPSCR_FX = 0, /* exception summary */ + FPSCR_FEX, /* enabled exception summary */ + FPSCR_VX, /* invalid operation summary */ + FPSCR_OX, /* overflow */ + FPSCR_UX, /* underflow */ + FPSCR_ZX, /* zero divide */ + FPSCR_XX, /* inexact */ + FPSCR_VXSNAN, /* invalid operation for sNaN */ + FPSCR_VXISI, /* invalid operation for Inf-Inf */ + FPSCR_VXIDI, /* invalid operation for Inf/Inf */ + FPSCR_VXZDZ, /* invalid operation for 0/0 */ + FPSCR_VXIMZ, /* invalid operation for Inf*0 */ + FPSCR_VXVC, /* invalid operation for invalid compare */ + FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ + FPSCR_FI, /* fraction inexact */ + FPSCR_FPRF_C, /* result class descriptor */ + FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ + FPSCR_FPRF_FG, /* result greater than */ + FPSCR_FPRF_FE, /* result equal to */ + FPSCR_FPRF_FU, /* result unordered */ + FPSCR_20, /* reserved */ + FPSCR_VXSOFT, /* invalid operation set by software */ + FPSCR_VXSQRT, /* invalid operation for square root */ + FPSCR_VXCVI, /* invalid operation for invalid integer convert */ + FPSCR_VE, /* invalid operation exception enable */ + FPSCR_OE, /* overflow exception enable */ + FPSCR_UE, /* underflow exception enable */ + FPSCR_ZE, /* zero divide exception enable */ + FPSCR_XE, /* inexact exception enable */ +#ifdef _ARCH_PWR6 + FPSCR_29, /* Reserved in ISA 2.05 */ +#else + FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */ +#endif /* _ARCH_PWR6 */ + /* the remaining two least-significant bits keep the rounding mode */ +}; + +static inline int +fenv_reg_to_exceptions (unsigned long long l) +{ + int result = 0; + if (l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; + if (l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; + if (l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; + if (l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; + if (l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + return result; +} + +#ifdef _ARCH_PWR6 + /* Not supported in ISA 2.05. Provided for source compat only. */ +# define FPSCR_NI 29 +#endif /* _ARCH_PWR6 */ + +/* This operation (i) sets the appropriate FPSCR bits for its + parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) + otherwise passes its parameter through unchanged (in particular, -0 + and +0 stay as they were). The `obvious' way to do this is optimised + out by gcc. */ +#define f_wash(x) \ + ({ double d; asm volatile ("fmul %0,%1,%2" \ + : "=f"(d) \ + : "f" (x), "f"((float)1.0)); d; }) +#define f_washf(x) \ + ({ float f; asm volatile ("fmuls %0,%1,%2" \ + : "=f"(f) \ + : "f" (x), "f"((float)1.0)); f; }) + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h b/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h new file mode 100644 index 0000000000..877f25bcf2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h @@ -0,0 +1,229 @@ +/* Private floating point rounding and exceptions handling. PowerPC version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FENV_PRIVATE_H +#define FENV_PRIVATE_H 1 + +#include <fenv.h> +#include <fenv_libc.h> +#include <fpu_control.h> + +/* Mask for the exception enable bits. */ +#define _FPU_ALL_TRAPS (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \ + | _FPU_MASK_XM | _FPU_MASK_IM) + +/* Mask the rounding mode bits. */ +#define _FPU_MASK_RN (~0x3) + +/* Mask everything but the rounding moded and non-IEEE arithmetic flags. */ +#define _FPU_MASK_NOT_RN_NI 0xffffffff00000007LL + +/* Mask restore rounding mode and exception enabled. */ +#define _FPU_MASK_TRAPS_RN 0xffffffff1fffff00LL + +/* Mask exception enable but fraction rounded/inexact and FP result/CC + bits. */ +#define _FPU_MASK_FRAC_INEX_RET_CC 0xffffffff1ff80fff + +static __always_inline void +__libc_feholdbits_ppc (fenv_t *envp, unsigned long long mask, + unsigned long long bits) +{ + fenv_union_t old, new; + + old.fenv = *envp = fegetenv_register (); + + new.l = (old.l & mask) | bits; + + /* If the old env had any enabled exceptions, then mask SIGFPE in the + MSR FE0/FE1 bits. This may allow the FPU to run faster because it + always takes the default action and can not generate SIGFPE. */ + if ((old.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_mask_env (); + + fesetenv_register (new.fenv); +} + +static __always_inline void +libc_feholdexcept_ppc (fenv_t *envp) +{ + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI, 0LL); +} + +static __always_inline void +libc_feholdexcept_setround_ppc (fenv_t *envp, int r) +{ + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI & _FPU_MASK_RN, r); +} + +static __always_inline void +libc_fesetround_ppc (int r) +{ + __fesetround_inline (r); +} + +static __always_inline int +libc_fetestexcept_ppc (int e) +{ + fenv_union_t u; + u.fenv = fegetenv_register (); + return u.l & e; +} + +static __always_inline void +libc_feholdsetround_ppc (fenv_t *e, int r) +{ + __libc_feholdbits_ppc (e, _FPU_MASK_TRAPS_RN, r); +} + +static __always_inline unsigned long long +__libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask, + unsigned long long new_mask) +{ + fenv_union_t old, new; + + new.fenv = *envp; + old.fenv = fegetenv_register (); + + /* Merge bits while masking unwanted bits from new and old env. */ + new.l = (old.l & old_mask) | (new.l & new_mask); + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ + if ((old.l & _FPU_ALL_TRAPS) == 0 && (new.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0) + (void) __fe_mask_env (); + + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ + fesetenv_register (new.fenv); + + return old.l; +} + +static __always_inline void +libc_fesetenv_ppc (const fenv_t *envp) +{ + /* Replace the entire environment. */ + __libc_femergeenv_ppc (envp, 0LL, -1LL); +} + +static __always_inline void +libc_feresetround_ppc (fenv_t *envp) +{ + __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, _FPU_MASK_FRAC_INEX_RET_CC); +} + +static __always_inline int +libc_feupdateenv_test_ppc (fenv_t *envp, int ex) +{ + return __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, + _FPU_MASK_FRAC_INEX_RET_CC) & ex; +} + +static __always_inline void +libc_feupdateenv_ppc (fenv_t *e) +{ + libc_feupdateenv_test_ppc (e, 0); +} + +#define libc_feholdexceptf libc_feholdexcept_ppc +#define libc_feholdexcept libc_feholdexcept_ppc +#define libc_feholdexcept_setroundf libc_feholdexcept_setround_ppc +#define libc_feholdexcept_setround libc_feholdexcept_setround_ppc +#define libc_fetestexceptf libc_fetestexcept_ppc +#define libc_fetestexcept libc_fetestexcept_ppc +#define libc_fesetroundf libc_fesetround_ppc +#define libc_fesetround libc_fesetround_ppc +#define libc_fesetenvf libc_fesetenv_ppc +#define libc_fesetenv libc_fesetenv_ppc +#define libc_feupdateenv_testf libc_feupdateenv_test_ppc +#define libc_feupdateenv_test libc_feupdateenv_test_ppc +#define libc_feupdateenvf libc_feupdateenv_ppc +#define libc_feupdateenv libc_feupdateenv_ppc +#define libc_feholdsetroundf libc_feholdsetround_ppc +#define libc_feholdsetround libc_feholdsetround_ppc +#define libc_feresetroundf libc_feresetround_ppc +#define libc_feresetround libc_feresetround_ppc + + +/* We have support for rounding mode context. */ +#define HAVE_RM_CTX 1 + +static __always_inline void +libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r) +{ + fenv_union_t old, new; + + old.fenv = fegetenv_register (); + + new.l = (old.l & _FPU_MASK_TRAPS_RN) | r; + + ctx->env = old.fenv; + if (__glibc_unlikely (new.l != old.l)) + { + if ((old.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_mask_env (); + fesetenv_register (new.fenv); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +static __always_inline void +libc_fesetenv_ppc_ctx (struct rm_ctx *ctx) +{ + libc_fesetenv_ppc (&ctx->env); +} + +static __always_inline void +libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feresetround_ppc (&ctx->env); +} + +static __always_inline void +libc_feresetround_ppc_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feresetround_ppc (&ctx->env); +} + +#define libc_fesetenv_ctx libc_fesetenv_ppc_ctx +#define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx +#define libc_fesetenvl_ctx libc_fesetenv_ppc_ctx +#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx +#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx +#define libc_feholdsetroundl_ctx libc_feholdsetround_ppc_ctx +#define libc_feresetround_ctx libc_feresetround_ppc_ctx +#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx +#define libc_feresetroundl_ctx libc_feresetround_ppc_ctx +#define libc_feupdateenv_ctx libc_feupdateenv_ppc_ctx +#define libc_feupdateenvf_ctx libc_feupdateenv_ppc_ctx +#define libc_feupdateenvl_ctx libc_feupdateenv_ppc_ctx + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c new file mode 100644 index 0000000000..7208ab455a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c @@ -0,0 +1,63 @@ +/* Install given floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t old, new; + + /* get the currently set exceptions. */ + new.fenv = *envp; + old.fenv = fegetenv_register (); + if (old.l == new.l) + return 0; + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + fesetenv_register (*envp); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c new file mode 100644 index 0000000000..47ea8e499d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c @@ -0,0 +1,42 @@ +/* Set given exception flags. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + fenv_union_t u, n; + + u.fenv = fegetenv_register (); + n.l = (u.l + | (excepts & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + if (n.l != u.l) + { + fesetenv_register (n.fenv); + + /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ + if (excepts & FE_INVALID) + feraiseexcept (FE_INVALID); + } + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c new file mode 100644 index 0000000000..794f762898 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c @@ -0,0 +1,49 @@ +/* Install given floating-point control modes. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \ + | _FPU_MASK_XM | _FPU_MASK_IM) + +#define FPU_STATUS 0xbffff700ULL + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t old, new; + + /* Logic regarding enabled exceptions as in fesetenv. */ + + new.fenv = *modep; + old.fenv = fegetenv_register (); + new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS); + + if (old.l == new.l) + return 0; + + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void) __fe_mask_env (); + + fesetenv_register (new.fenv); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c new file mode 100644 index 0000000000..a041f1add9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c @@ -0,0 +1,33 @@ +/* Set current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef fesetround +int +__fesetround (int round) +{ + if ((unsigned int) round > 3) + return 1; + else + return __fesetround_inline(round); +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c new file mode 100644 index 0000000000..551cc1734d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c @@ -0,0 +1,68 @@ +/* Install given floating-point environment and raise exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__feupdateenv (const fenv_t *envp) +{ + fenv_union_t old, new; + + /* Save the currently set exceptions. */ + new.fenv = *envp; + old.fenv = fegetenv_register (); + + /* Restore rounding mode and exception enable from *envp and merge + exceptions. Leave fraction rounded/inexact and FP result/CC bits + unchanged. */ + new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff); + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put + the hardware into "precise mode" and may cause the FPU to run slower on + some hardware. */ + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ + fesetenv_register (new.fenv); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c new file mode 100644 index 0000000000..a11b8f3323 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c @@ -0,0 +1,42 @@ +/* Store current representation for exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + fenv_union_t u; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Return (all of) it. */ + *flagp = u.l & excepts & FE_ALL_EXCEPT; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h b/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h new file mode 100644 index 0000000000..17ac1d2c83 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h @@ -0,0 +1,28 @@ +/* Fix for missing "invalid" exceptions from floating-point + comparisons. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_FP_INT_COMPARE_INVALID_H +#define FIX_FP_INT_COMPARE_INVALID_H 1 + +/* As of GCC 5, comparisons use unordered comparison instructions when + they should use ordered comparisons + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684>. */ +#define FIX_COMPARE_INVALID 1 + +#endif /* fix-fp-int-compare-invalid.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c new file mode 100644 index 0000000000..05f5cb6309 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c @@ -0,0 +1,68 @@ +/* Raise given exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feraiseexcept +int +__feraiseexcept (int excepts) +{ + fenv_union_t u; + + /* Raise exceptions represented by EXCEPTS. It is the responsibility of + the OS to ensure that if multiple exceptions occur they are fed back + to this process in the proper way; this can happen in hardware, + anyway (in particular, inexact with overflow or underflow). */ + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Add the exceptions */ + u.l = (u.l + | (excepts & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment), + triggering any appropriate exceptions. */ + fesetenv_register (u.fenv); + + if ((excepts & FE_INVALID)) + { + /* For some reason, some PowerPC chips (the 601, in particular) + don't have FE_INVALID_SOFTWARE implemented. Detect this + case and raise FE_INVALID_SNAN instead. */ + u.fenv = fegetenv_register (); + if ((u.l & FE_INVALID) == 0) + set_fpscr_bit (FPSCR_VXSNAN); + } + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c new file mode 100644 index 0000000000..d5c0963688 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c @@ -0,0 +1,63 @@ +/* Set floating-point environment exception handling. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + fenv_union_t u, n; + fexcept_t flag; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Ignore exceptions not listed in 'excepts'. */ + flag = *flagp & excepts; + + /* Replace the exception status */ + int excepts_mask = FPSCR_STICKY_BITS & excepts; + if ((excepts & FE_INVALID) != 0) + excepts_mask |= FE_ALL_INVALID; + n.l = ((u.l & ~excepts_mask) + | (flag & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment). + This may cause floating-point exceptions if the restored state + requests it. */ + if (n.l != u.l) + fesetenv_register (n.fenv); + + /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ + if (flag & FE_INVALID) + feraiseexcept(FE_INVALID); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c new file mode 100644 index 0000000000..8f2ecad509 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c @@ -0,0 +1,33 @@ +/* Test exception in current environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + fenv_union_t u; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* The FE_INVALID bit is dealt with correctly by the hardware, so we can + just: */ + return u.l & excepts; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c b/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c new file mode 100644 index 0000000000..b9e31dc64d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c @@ -0,0 +1,65 @@ +/* k_cosf.c -- float version of k_cos.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <fenv.h> +#include <math_private.h> + +static const float twom27 = 7.4505806e-09; +static const float dot3 = 3.0000001e-01; +static const float dot78125 = 7.8125000e-01; + +static const float one = 1.0000000000e+00; +static const float C1 = 4.1666667908e-02; +static const float C2 = -1.3888889225e-03; +static const float C3 = 2.4801587642e-05; +static const float C4 = -2.7557314297e-07; +static const float C5 = 2.0875723372e-09; +static const float C6 = -1.1359647598e-11; + +float +__kernel_cosf (float x, float y) +{ + float a, hz, z, r, qx; + float ix; + ix = __builtin_fabsf (x); + if (ix < twom27) + { /* |x| < 2**-27 */ + __feraiseexcept (FE_INEXACT); + return one; + } + z = x * x; + r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6))))); + if (ix < dot3) /* if |x| < 0.3 */ + return one - ((float) 0.5 * z - (z * r - x * y)); + else + { + if (ix > dot78125) + { /* x > 0.78125 */ + qx = (float) 0.28125; + } + else + { + qx = ix / 4.0; + } + hz = (float) 0.5 *z - qx; + a = one - qx; + return a - (hz - (z * r - x * y)); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c b/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c new file mode 100644 index 0000000000..04ed62055a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c @@ -0,0 +1,273 @@ +/* k_rem_pio2f.c -- float version of e_rem_pio2.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#include <math_private.h> +#include "s_float_bitwise.h" + + +static const float two_over_pi[] = { + 1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02, + 7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01, + 2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02, + 2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02, + 2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02, + 6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01, + 2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02, + 2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01, + 1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01, + 6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02, + 6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02, + 9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01, + 2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01, + 1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01, + 2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02, + 4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02, + 2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01, + 1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01, + 5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01, + 1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02, + 1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02, + 1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01, + 3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02, + 2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01, + 2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02, + 9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01, + 1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02, + 3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02, + 7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02, + 1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01, + 1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02, + 2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02, + 6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02, + 1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02, + 1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02, + 3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00, + 8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01, + 2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02, + 2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01, + 1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01, + 1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01, + 9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00, + 1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01, + 2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01, + 1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02, + 7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01, + 6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01, + 2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02, + 9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02, + 1.40000000e+02, 1.07000000e+02 +}; + + +static const float PIo2[] = { + 1.5703125000e+00, /* 0x3fc90000 */ + 4.5776367188e-04, /* 0x39f00000 */ + 2.5987625122e-05, /* 0x37da0000 */ + 7.5437128544e-08, /* 0x33a20000 */ + 6.0026650317e-11, /* 0x2e840000 */ + 7.3896444519e-13, /* 0x2b500000 */ + 5.3845816694e-15, /* 0x27c20000 */ + 5.6378512969e-18, /* 0x22d00000 */ + 8.3009228831e-20, /* 0x1fc40000 */ + 3.2756352257e-22, /* 0x1bc60000 */ + 6.3331015649e-25, /* 0x17440000 */ +}; + + +static const float zero = 0.0000000000e+00; +static const float one = 1.0000000000; +static const float twon8 = 3.9062500000e-03; +static const float two8 = 2.5600000000e+02; + + +int32_t +__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx) +{ + int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp; + float z, fw, f[20], fq[20], q[20]; + + /* initialize jk */ + jp = jk = 9; + + /* determine jx,jv,q0, note that 3>q0 */ + jx = nx - 1; + exp = __float_get_exp (e0) - 127; + jv = (exp - 3) / 8; + if (jv < 0) + jv = 0; + q0 = exp - 8 * (jv + 1); + + /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */ + j = jv - jx; + m = jx + jk; + for (i = 0; i <= m; i++, j++) + f[i] = (j < 0) ? zero : two_over_pi[j]; + + /* compute q[0],q[1],...q[jk] */ + for (i = 0; i <= jk; i++) + { + for (j = 0, fw = 0.0; j <= jx; j++) + fw += x[j] * f[jx + i - j]; + q[i] = fw; + } + + jz = jk; +recompute: + /* distill q[] into iq[] reversingly */ + for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) + { + fw = __truncf (twon8 * z); + iq[i] = (int32_t) (z - two8 * fw); + z = q[j - 1] + fw; + } + + /* compute n */ + z = __scalbnf (z, q0); /* actual value of z */ + z -= 8.0 * __floorf (z * 0.125); /* trim off integer >= 8 */ + n = (int32_t) z; + z -= __truncf (z); + ih = 0; + if (q0 > 0) + { /* need iq[jz-1] to determine n */ + i = (iq[jz - 1] >> (8 - q0)); + n += i; + iq[jz - 1] -= i << (8 - q0); + ih = iq[jz - 1] >> (7 - q0); + } + else if (q0 == 0) + ih = iq[jz - 1] >> 7; + else if (z >= 0.5) + ih = 2; + + if (ih > 0) + { /* q > 0.5 */ + n += 1; + carry = 0; + for (i = 0; i < jz; i++) + { /* compute 1-q */ + j = iq[i]; + if (carry == 0) + { + if (j != 0) + { + carry = 1; + iq[i] = 0x100 - j; + } + } + else + iq[i] = 0xff - j; + } + if (q0 > 0) + { /* rare case: chance is 1 in 12 */ + switch (q0) + { + case 1: + iq[jz - 1] &= 0x7f; + break; + case 2: + iq[jz - 1] &= 0x3f; + break; + } + } + if (ih == 2) + { + z = one - z; + if (carry != 0) + z -= __scalbnf (one, q0); + } + } + + /* check if recomputation is needed */ + if (z == zero) + { + j = 0; + for (i = jz - 1; i >= jk; i--) + j |= iq[i]; + if (j == 0) + { /* need recomputation */ + for (k = 1; iq[jk - k] == 0; k++); /* k = no. of terms needed */ + + for (i = jz + 1; i <= jz + k; i++) + { /* add q[jz+1] to q[jz+k] */ + f[jx + i] = two_over_pi[jv + i]; + for (j = 0, fw = 0.0; j <= jx; j++) + fw += x[j] * f[jx + i - j]; + q[i] = fw; + } + jz += k; + goto recompute; + } + } + + /* chop off zero terms */ + if (z == 0.0) + { + jz -= 1; + q0 -= 8; + while (iq[jz] == 0) + { + jz--; + q0 -= 8; + } + } + else + { /* break z into 8-bit if necessary */ + z = __scalbnf (z, -q0); + if (z >= two8) + { + fw = __truncf (twon8 * z); + iq[jz] = (int32_t) (z - two8 * fw); + jz += 1; + q0 += 8; + iq[jz] = (int32_t) fw; + } + else + iq[jz] = (int32_t) z; + } + + /* convert integer "bit" chunk to floating-point value */ + fw = __scalbnf (one, q0); + for (i = jz; i >= 0; i--) + { + q[i] = fw * (float) iq[i]; + fw *= twon8; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for (i = jz; i >= 0; i--) + { + for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) + fw += PIo2[k] * q[i + k]; + fq[jz - i] = fw; + } + + /* compress fq[] into y[] */ + fw = 0.0; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0) ? fw : -fw; + fw = fq[0] - fw; + for (i = 1; i <= jz; i++) + fw += fq[i]; + y[1] = (ih == 0) ? fw : -fw; + + return n & 7; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c b/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c new file mode 100644 index 0000000000..251633dc30 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c @@ -0,0 +1,57 @@ +/* k_sinf.c -- float version of k_sin.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <fenv.h> +#include <math_private.h> + + +static const float twom27 = 7.4505806000e-09; +static const float half = 5.0000000000e-01; +static const float S1 = -1.6666667163e-01; +static const float S2 = 8.3333337680e-03; +static const float S3 = -1.9841270114e-04; +static const float S4 = 2.7557314297e-06; +static const float S5 = -2.5050759689e-08; +static const float S6 = 1.5896910177e-10; + + +float +__kernel_sinf (float x, float y, int iy) +{ + float z, r, v; + float ix; + ix = __builtin_fabsf (x); + if (ix < twom27) + { /* |x| < 2**-27 */ + if (ix < FLT_MIN && ix != 0.0f) + __feraiseexcept (FE_UNDERFLOW|FE_INEXACT); + else + __feraiseexcept (FE_INEXACT); + return x; + } + z = x * x; + v = z * x; + r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6))); + if (iy == 0) + return x + v * (S1 + z * r); + else + return x - ((z * (half * y - v * r) - y) - v * S1); +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps new file mode 100644 index 0000000000..72eb2b1e5a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps @@ -0,0 +1,2342 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 4 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: "atan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "atan2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cacos": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Real part of "cacosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "cacosh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 8 +ldouble: 8 + +Function: "carg": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "carg_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "casin": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "casin_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Real part of "casinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "casinh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catan_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catanh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "catanh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "clog": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 6 +float: 6 +idouble: 6 +ifloat: 6 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Real part of "clog10_upward": +double: 8 +float: 5 +idouble: 8 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_downward": +double: 7 +float: 5 +idouble: 7 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "clog_towardzero": +double: 7 +float: 5 +idouble: 7 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_upward": +double: 8 +float: 5 +idouble: 8 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "cos": +float: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cos_downward": +double: 1 +float: 4 +idouble: 1 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "cos_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cos_upward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow": +double: 2 +float: 5 +idouble: 2 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_downward": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_towardzero": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "erfc": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "erfc_downward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 10 +ldouble: 10 + +Function: "erfc_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +Function: "erfc_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "exp": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "fma": +ildouble: 1 +ldouble: 1 + +Function: "fma_downward": +ildouble: 1 +ldouble: 1 + +Function: "fma_towardzero": +ildouble: 2 +ldouble: 2 + +Function: "fma_upward": +ildouble: 3 +ldouble: 3 + +Function: "fmod": +ildouble: 1 +ldouble: 1 + +Function: "fmod_downward": +ildouble: 1 +ldouble: 1 + +Function: "fmod_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "fmod_upward": +ildouble: 1 +ldouble: 1 + +Function: "gamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "gamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "gamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 16 +ldouble: 16 + +Function: "gamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 3 +ldouble: 3 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: "j0_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 8 +ldouble: 8 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "j1_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "jn_downward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "jn_towardzero": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "jn_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "lgamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "lgamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 16 +ldouble: 16 + +Function: "lgamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "nextafter_downward": +ildouble: 1 +ldouble: 1 + +Function: "nextafter_upward": +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 7 +ldouble: 7 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sqrt": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_downward": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_upward": +ildouble: 1 +ldouble: 1 + +Function: "tan": +float: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "tan_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tan_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "tan_upward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "y0_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "y1_upward": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "yn_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name new file mode 100644 index 0000000000..8c5f7fa2ab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name @@ -0,0 +1 @@ +PowerPC diff --git a/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h b/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h new file mode 100644 index 0000000000..05f51217bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h @@ -0,0 +1,55 @@ +/* Manipulation of the bit representation of 'long double' quantities. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_LDBL_H_PPC_ +#define _MATH_LDBL_H_PPC_ 1 + +/* GCC does not optimize the default ldbl_pack code to not spill register + in the stack. The following optimization tells gcc that pack/unpack + is really a nop. We use fr1/fr2 because those are the regs used to + pass/return a single long double arg. */ +static inline long double +ldbl_pack_ppc (double a, double aa) +{ + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); + register double xl __asm__ ("fr2"); + xh = a; + xl = aa; + __asm__ ("" : "=f" (x) : "f" (xh), "f" (xl)); + return x; +} + +static inline void +ldbl_unpack_ppc (long double l, double *a, double *aa) +{ + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); + register double xl __asm__ ("fr2"); + x = l; + __asm__ ("" : "=f" (xh), "=f" (xl) : "f" (x)); + *a = xh; + *aa = xl; +} + +#define ldbl_pack ldbl_pack_ppc +#define ldbl_unpack ldbl_unpack_ppc + +#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h> + +#endif /* math_ldbl.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/math_private.h b/REORG.TODO/sysdeps/powerpc/fpu/math_private.h new file mode 100644 index 0000000000..3c71275392 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/math_private.h @@ -0,0 +1,142 @@ +/* Private inline math functions for powerpc. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PPC_MATH_PRIVATE_H_ +#define _PPC_MATH_PRIVATE_H_ + +#include <sysdep.h> +#include <ldsodefs.h> +#include <dl-procinfo.h> +#include <fenv_private.h> +#include_next <math_private.h> + +extern double __slow_ieee754_sqrt (double); +extern __always_inline double +__ieee754_sqrt (double __x) +{ + double __z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrt(__x); +#endif + + return __z; +} + +extern float __slow_ieee754_sqrtf (float); +extern __always_inline float +__ieee754_sqrtf (float __x) +{ + float __z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrtf(__x); +#endif + + return __z; +} + +#if defined _ARCH_PWR5X + +# ifndef __round +# define __round(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __roundf +# define __roundf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __trunc +# define __trunc(x) \ + ({ double __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __truncf +# define __truncf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __ceil +# define __ceil(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __ceilf +# define __ceilf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __floor +# define __floor(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __floorf +# define __floorf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +#endif /* defined _ARCH_PWR5X */ + +#endif /* _PPC_MATH_PRIVATE_H_ */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c new file mode 100644 index 0000000000..772b138ac3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c @@ -0,0 +1,69 @@ +/* s_cosf.c -- float version of s_cos.c. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> + +static const float pio4 = 7.8539801e-1; + +float +__cosf (float x) +{ + float y[2], z = 0.0; + float ix; + int32_t n; + + ix = __builtin_fabsf (x); + + /* |x| ~< pi/4 */ + if (ix <= pio4) + { + return __kernel_cosf (x, z); + /* cos(Inf or NaN) is NaN */ + } + else if (isnanf (ix)) + { + return x - x; + } + else if (isinff (ix)) + { + __set_errno (EDOM); + return x - x; + } + + /* argument reduction needed */ + else + { + n = __ieee754_rem_pio2f (x, y); + switch (n & 3) + { + case 0: + return __kernel_cosf (y[0], y[1]); + case 1: + return -__kernel_sinf (y[0], y[1], 1); + case 2: + return -__kernel_cosf (y[0], y[1]); + default: + return __kernel_sinf (y[0], y[1], 1); + } + } +} + +weak_alias (__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S new file mode 100644 index 0000000000..87dc82eb28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S @@ -0,0 +1,36 @@ +/* Floating-point absolute value. PowerPC version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fabs) +/* double [f1] fabs (double [f1] x); */ + fabs fp1,fp1 + blr +END(__fabs) + +weak_alias (__fabs,fabs) + +/* It turns out that it's safe to use this code even for single-precision. */ +strong_alias(__fabs,__fabsf) +weak_alias (__fabs,fabsf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__fabs,__fabsl) +weak_alias (__fabs,fabsl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S new file mode 100644 index 0000000000..877c710ce8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S @@ -0,0 +1 @@ +/* __fabsf is in s_fabs.S */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h b/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h new file mode 100644 index 0000000000..8e63fb253b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h @@ -0,0 +1,115 @@ +/* Bitwise manipulation over float. Function prototypes. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT_BITWISE_ +#define _FLOAT_BITWISE_ 1 + +#include <math_private.h> + +/* Returns (int)(num & 0x7FFFFFF0 == value) */ +static inline int +__float_and_test28 (float num, float value) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7ffffff0 }; + __asm__ ( + /* the 'f' constraint is used on mask because we just need + * to compare floats, not full vector */ + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7ffffff0); + SET_FLOAT_WORD(ret, inum); +#endif + return (ret == value); +} + +/* Returns (int)(num & 0x7FFFFF00 == value) */ +static inline int +__float_and_test24 (float num, float value) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7fffff00 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7fffff00); + SET_FLOAT_WORD(ret, inum); +#endif + return (ret == value); +} + +/* Returns (float)(num & 0x7F800000) */ +static inline float +__float_and8 (float num) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7f800000 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7f800000); + SET_FLOAT_WORD(ret, inum); +#endif + return ret; +} + +/* Returns ((int32_t)(num & 0x7F800000) >> 23) */ +static inline int32_t +__float_get_exp (float num) +{ + int32_t inum; +#ifdef _ARCH_PWR7 + float ret; + union { + int i; + float f; + } mask = { .i = 0x7f800000 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + GET_FLOAT_WORD(inum, ret); +#else + GET_FLOAT_WORD(inum, num); + inum = inum & 0x7f800000; +#endif + return inum >> 23; +} + +#endif /* s_float_bitwise.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S new file mode 100644 index 0000000000..e101f374bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S @@ -0,0 +1,32 @@ +/* Compute x * y + z as ternary operation. PowerPC version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fma) +/* double [f1] fma (double [f1] x, double [f2] y, double [f3] z); */ + fmadd fp1,fp1,fp2,fp3 + blr +END(__fma) + +weak_alias (__fma,fma) + +#ifdef NO_LONG_DOUBLE +weak_alias (__fma,__fmal) +weak_alias (__fma,fmal) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S new file mode 100644 index 0000000000..49ea298707 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S @@ -0,0 +1,27 @@ +/* Compute x * y + z as ternary operation. PowerPC version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fmaf) +/* float [f1] fmaf (float [f1] x, float [f2] y, float [f3] z); */ + fmadds fp1,fp1,fp2,fp3 + blr +END(__fmaf) + +weak_alias (__fmaf,fmaf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c b/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c new file mode 100644 index 0000000000..f75391fa80 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c @@ -0,0 +1,62 @@ +/* Return 1 if argument is a NaN, else 0. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Ugly kludge to avoid declarations. */ +#define __isnanf __Xisnanf +#define isnanf Xisnanf +#define __GI___isnanf __GI___Xisnanf + +#include <math.h> +#include <math_ldbl_opt.h> +#include <fenv_libc.h> + +#undef __isnanf +#undef isnanf +#undef __GI___isnanf + + +/* The hidden_proto in include/math.h was obscured by the macro hackery. */ +__typeof (__isnan) __isnanf; +hidden_proto (__isnanf) + + +int +__isnan (double x) +{ + fenv_t savedstate; + int result; + savedstate = fegetenv_register (); + reset_fpscr_bit (FPSCR_VE); + result = !(x == x); + fesetenv_register (savedstate); + return result; +} +hidden_def (__isnan) +weak_alias (__isnan, isnan) + + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S new file mode 100644 index 0000000000..fc22f678a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* __isnanf is in s_isnan.c */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S new file mode 100644 index 0000000000..e24766535f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S @@ -0,0 +1 @@ +/* __lrintf is in s_lrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c b/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c new file mode 100644 index 0000000000..a96140b2c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c @@ -0,0 +1,46 @@ +/* Round a 64-bit floating point value to the nearest integer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +double +__rint (double x) +{ + static const float TWO52 = 4503599627370496.0; + + if (fabs (x) < TWO52) + { + if (x > 0.0) + { + x += TWO52; + x -= TWO52; + } + else if (x < 0.0) + { + x = TWO52 - x; + x = -(x - TWO52); + } + } + + return x; +} +weak_alias (__rint, rint) +#ifdef NO_LONG_DOUBLE +strong_alias (__rint, __rintl) +weak_alias (__rint, rintl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c new file mode 100644 index 0000000000..6b16c7bec4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c @@ -0,0 +1,42 @@ +/* Round a 32-bit floating point value to the nearest integer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__rintf (float x) +{ + static const float TWO23 = 8388608.0; + + if (fabsf (x) < TWO23) + { + if (x > 0.0) + { + x += TWO23; + x -= TWO23; + } + else if (x < 0.0) + { + x = TWO23 - x; + x = -(x - TWO23); + } + } + + return x; +} +weak_alias (__rintf, rintf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c new file mode 100644 index 0000000000..54a428e68a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c @@ -0,0 +1,69 @@ +/* s_sinf.c -- float version of s_sin.c. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> + +static const float pio4 = 7.8539801e-1; + +float +__sinf (float x) +{ + float y[2], z = 0.0; + float ix; + int32_t n; + + ix = __builtin_fabsf (x); + + /* |x| ~< pi/4 */ + if (ix <= pio4) + { + return __kernel_sinf (x, z, 0); + /* sin(Inf or NaN) is NaN */ + } + else if (isnanf (ix)) + { + return x - x; + } + else if (isinff (ix)) + { + __set_errno (EDOM); + return x - x; + } + + /* argument reduction needed */ + else + { + n = __ieee754_rem_pio2f (x, y); + switch (n & 3) + { + case 0: + return __kernel_sinf (y[0], y[1], 1); + case 1: + return __kernel_cosf (y[0], y[1]); + case 2: + return -__kernel_sinf (y[0], y[1], 1); + default: + return -__kernel_cosf (y[0], y[1]); + } + } +} + +weak_alias (__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c b/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c new file mode 100644 index 0000000000..9ed7436ae6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c @@ -0,0 +1,144 @@ +const float __t_sqrt[1024] = { +0.7078,0.7064, 0.7092,0.7050, 0.7106,0.7037, 0.7119,0.7023, 0.7133,0.7010, +0.7147,0.6996, 0.7160,0.6983, 0.7174,0.6970, 0.7187,0.6957, 0.7201,0.6943, +0.7215,0.6930, 0.7228,0.6917, 0.7242,0.6905, 0.7255,0.6892, 0.7269,0.6879, +0.7282,0.6866, 0.7295,0.6854, 0.7309,0.6841, 0.7322,0.6829, 0.7335,0.6816, +0.7349,0.6804, 0.7362,0.6792, 0.7375,0.6779, 0.7388,0.6767, 0.7402,0.6755, +0.7415,0.6743, 0.7428,0.6731, 0.7441,0.6719, 0.7454,0.6708, 0.7467,0.6696, +0.7480,0.6684, 0.7493,0.6672, 0.7507,0.6661, 0.7520,0.6649, 0.7532,0.6638, +0.7545,0.6627, 0.7558,0.6615, 0.7571,0.6604, 0.7584,0.6593, 0.7597,0.6582, +0.7610,0.6570, 0.7623,0.6559, 0.7635,0.6548, 0.7648,0.6537, 0.7661,0.6527, +0.7674,0.6516, 0.7686,0.6505, 0.7699,0.6494, 0.7712,0.6484, 0.7725,0.6473, +0.7737,0.6462, 0.7750,0.6452, 0.7762,0.6441, 0.7775,0.6431, 0.7787,0.6421, +0.7800,0.6410, 0.7812,0.6400, 0.7825,0.6390, 0.7837,0.6380, 0.7850,0.6370, +0.7862,0.6359, 0.7875,0.6349, 0.7887,0.6339, 0.7900,0.6330, 0.7912,0.6320, +0.7924,0.6310, 0.7937,0.6300, 0.7949,0.6290, 0.7961,0.6281, 0.7973,0.6271, +0.7986,0.6261, 0.7998,0.6252, 0.8010,0.6242, 0.8022,0.6233, 0.8034,0.6223, +0.8046,0.6214, 0.8059,0.6205, 0.8071,0.6195, 0.8083,0.6186, 0.8095,0.6177, +0.8107,0.6168, 0.8119,0.6158, 0.8131,0.6149, 0.8143,0.6140, 0.8155,0.6131, +0.8167,0.6122, 0.8179,0.6113, 0.8191,0.6104, 0.8203,0.6096, 0.8215,0.6087, +0.8227,0.6078, 0.8238,0.6069, 0.8250,0.6060, 0.8262,0.6052, 0.8274,0.6043, +0.8286,0.6035, 0.8297,0.6026, 0.8309,0.6017, 0.8321,0.6009, 0.8333,0.6000, +0.8344,0.5992, 0.8356,0.5984, 0.8368,0.5975, 0.8379,0.5967, 0.8391,0.5959, +0.8403,0.5950, 0.8414,0.5942, 0.8426,0.5934, 0.8437,0.5926, 0.8449,0.5918, +0.8461,0.5910, 0.8472,0.5902, 0.8484,0.5894, 0.8495,0.5886, 0.8507,0.5878, +0.8518,0.5870, 0.8530,0.5862, 0.8541,0.5854, 0.8552,0.5846, 0.8564,0.5838, +0.8575,0.5831, 0.8587,0.5823, 0.8598,0.5815, 0.8609,0.5808, 0.8621,0.5800, +0.8632,0.5792, 0.8643,0.5785, 0.8655,0.5777, 0.8666,0.5770, 0.8677,0.5762, +0.8688,0.5755, 0.8700,0.5747, 0.8711,0.5740, 0.8722,0.5733, 0.8733,0.5725, +0.8744,0.5718, 0.8756,0.5711, 0.8767,0.5703, 0.8778,0.5696, 0.8789,0.5689, +0.8800,0.5682, 0.8811,0.5675, 0.8822,0.5667, 0.8833,0.5660, 0.8844,0.5653, +0.8855,0.5646, 0.8866,0.5639, 0.8877,0.5632, 0.8888,0.5625, 0.8899,0.5618, +0.8910,0.5611, 0.8921,0.5605, 0.8932,0.5598, 0.8943,0.5591, 0.8954,0.5584, +0.8965,0.5577, 0.8976,0.5570, 0.8987,0.5564, 0.8998,0.5557, 0.9008,0.5550, +0.9019,0.5544, 0.9030,0.5537, 0.9041,0.5530, 0.9052,0.5524, 0.9062,0.5517, +0.9073,0.5511, 0.9084,0.5504, 0.9095,0.5498, 0.9105,0.5491, 0.9116,0.5485, +0.9127,0.5478, 0.9138,0.5472, 0.9148,0.5465, 0.9159,0.5459, 0.9170,0.5453, +0.9180,0.5446, 0.9191,0.5440, 0.9202,0.5434, 0.9212,0.5428, 0.9223,0.5421, +0.9233,0.5415, 0.9244,0.5409, 0.9254,0.5403, 0.9265,0.5397, 0.9276,0.5391, +0.9286,0.5384, 0.9297,0.5378, 0.9307,0.5372, 0.9318,0.5366, 0.9328,0.5360, +0.9338,0.5354, 0.9349,0.5348, 0.9359,0.5342, 0.9370,0.5336, 0.9380,0.5330, +0.9391,0.5324, 0.9401,0.5319, 0.9411,0.5313, 0.9422,0.5307, 0.9432,0.5301, +0.9442,0.5295, 0.9453,0.5289, 0.9463,0.5284, 0.9473,0.5278, 0.9484,0.5272, +0.9494,0.5266, 0.9504,0.5261, 0.9515,0.5255, 0.9525,0.5249, 0.9535,0.5244, +0.9545,0.5238, 0.9556,0.5233, 0.9566,0.5227, 0.9576,0.5221, 0.9586,0.5216, +0.9596,0.5210, 0.9607,0.5205, 0.9617,0.5199, 0.9627,0.5194, 0.9637,0.5188, +0.9647,0.5183, 0.9657,0.5177, 0.9667,0.5172, 0.9677,0.5167, 0.9687,0.5161, +0.9698,0.5156, 0.9708,0.5151, 0.9718,0.5145, 0.9728,0.5140, 0.9738,0.5135, +0.9748,0.5129, 0.9758,0.5124, 0.9768,0.5119, 0.9778,0.5114, 0.9788,0.5108, +0.9798,0.5103, 0.9808,0.5098, 0.9818,0.5093, 0.9828,0.5088, 0.9838,0.5083, +0.9847,0.5077, 0.9857,0.5072, 0.9867,0.5067, 0.9877,0.5062, 0.9887,0.5057, +0.9897,0.5052, 0.9907,0.5047, 0.9917,0.5042, 0.9926,0.5037, 0.9936,0.5032, +0.9946,0.5027, 0.9956,0.5022, 0.9966,0.5017, 0.9976,0.5012, 0.9985,0.5007, +0.9995,0.5002, +1.0010,0.4995, 1.0029,0.4985, 1.0049,0.4976, 1.0068,0.4966, 1.0088,0.4957, +1.0107,0.4947, 1.0126,0.4938, 1.0145,0.4928, 1.0165,0.4919, 1.0184,0.4910, +1.0203,0.4901, 1.0222,0.4891, 1.0241,0.4882, 1.0260,0.4873, 1.0279,0.4864, +1.0298,0.4855, 1.0317,0.4846, 1.0336,0.4837, 1.0355,0.4829, 1.0374,0.4820, +1.0393,0.4811, 1.0411,0.4802, 1.0430,0.4794, 1.0449,0.4785, 1.0468,0.4777, +1.0486,0.4768, 1.0505,0.4760, 1.0523,0.4751, 1.0542,0.4743, 1.0560,0.4735, +1.0579,0.4726, 1.0597,0.4718, 1.0616,0.4710, 1.0634,0.4702, 1.0653,0.4694, +1.0671,0.4686, 1.0689,0.4678, 1.0707,0.4670, 1.0726,0.4662, 1.0744,0.4654, +1.0762,0.4646, 1.0780,0.4638, 1.0798,0.4630, 1.0816,0.4623, 1.0834,0.4615, +1.0852,0.4607, 1.0870,0.4600, 1.0888,0.4592, 1.0906,0.4585, 1.0924,0.4577, +1.0942,0.4570, 1.0960,0.4562, 1.0978,0.4555, 1.0995,0.4547, 1.1013,0.4540, +1.1031,0.4533, 1.1049,0.4525, 1.1066,0.4518, 1.1084,0.4511, 1.1101,0.4504, +1.1119,0.4497, 1.1137,0.4490, 1.1154,0.4483, 1.1172,0.4476, 1.1189,0.4469, +1.1207,0.4462, 1.1224,0.4455, 1.1241,0.4448, 1.1259,0.4441, 1.1276,0.4434, +1.1293,0.4427, 1.1311,0.4421, 1.1328,0.4414, 1.1345,0.4407, 1.1362,0.4401, +1.1379,0.4394, 1.1397,0.4387, 1.1414,0.4381, 1.1431,0.4374, 1.1448,0.4368, +1.1465,0.4361, 1.1482,0.4355, 1.1499,0.4348, 1.1516,0.4342, 1.1533,0.4335, +1.1550,0.4329, 1.1567,0.4323, 1.1584,0.4316, 1.1600,0.4310, 1.1617,0.4304, +1.1634,0.4298, 1.1651,0.4292, 1.1668,0.4285, 1.1684,0.4279, 1.1701,0.4273, +1.1718,0.4267, 1.1734,0.4261, 1.1751,0.4255, 1.1768,0.4249, 1.1784,0.4243, +1.1801,0.4237, 1.1817,0.4231, 1.1834,0.4225, 1.1850,0.4219, 1.1867,0.4213, +1.1883,0.4208, 1.1900,0.4202, 1.1916,0.4196, 1.1932,0.4190, 1.1949,0.4185, +1.1965,0.4179, 1.1981,0.4173, 1.1998,0.4167, 1.2014,0.4162, 1.2030,0.4156, +1.2046,0.4151, 1.2063,0.4145, 1.2079,0.4139, 1.2095,0.4134, 1.2111,0.4128, +1.2127,0.4123, 1.2143,0.4117, 1.2159,0.4112, 1.2175,0.4107, 1.2192,0.4101, +1.2208,0.4096, 1.2224,0.4090, 1.2239,0.4085, 1.2255,0.4080, 1.2271,0.4075, +1.2287,0.4069, 1.2303,0.4064, 1.2319,0.4059, 1.2335,0.4054, 1.2351,0.4048, +1.2366,0.4043, 1.2382,0.4038, 1.2398,0.4033, 1.2414,0.4028, 1.2429,0.4023, +1.2445,0.4018, 1.2461,0.4013, 1.2477,0.4008, 1.2492,0.4003, 1.2508,0.3998, +1.2523,0.3993, 1.2539,0.3988, 1.2555,0.3983, 1.2570,0.3978, 1.2586,0.3973, +1.2601,0.3968, 1.2617,0.3963, 1.2632,0.3958, 1.2648,0.3953, 1.2663,0.3949, +1.2678,0.3944, 1.2694,0.3939, 1.2709,0.3934, 1.2725,0.3929, 1.2740,0.3925, +1.2755,0.3920, 1.2771,0.3915, 1.2786,0.3911, 1.2801,0.3906, 1.2816,0.3901, +1.2832,0.3897, 1.2847,0.3892, 1.2862,0.3887, 1.2877,0.3883, 1.2892,0.3878, +1.2907,0.3874, 1.2923,0.3869, 1.2938,0.3865, 1.2953,0.3860, 1.2968,0.3856, +1.2983,0.3851, 1.2998,0.3847, 1.3013,0.3842, 1.3028,0.3838, 1.3043,0.3834, +1.3058,0.3829, 1.3073,0.3825, 1.3088,0.3820, 1.3103,0.3816, 1.3118,0.3812, +1.3132,0.3807, 1.3147,0.3803, 1.3162,0.3799, 1.3177,0.3794, 1.3192,0.3790, +1.3207,0.3786, 1.3221,0.3782, 1.3236,0.3778, 1.3251,0.3773, 1.3266,0.3769, +1.3280,0.3765, 1.3295,0.3761, 1.3310,0.3757, 1.3324,0.3753, 1.3339,0.3748, +1.3354,0.3744, 1.3368,0.3740, 1.3383,0.3736, 1.3397,0.3732, 1.3412,0.3728, +1.3427,0.3724, 1.3441,0.3720, 1.3456,0.3716, 1.3470,0.3712, 1.3485,0.3708, +1.3499,0.3704, 1.3514,0.3700, 1.3528,0.3696, 1.3542,0.3692, 1.3557,0.3688, +1.3571,0.3684, 1.3586,0.3680, 1.3600,0.3676, 1.3614,0.3673, 1.3629,0.3669, +1.3643,0.3665, 1.3657,0.3661, 1.3672,0.3657, 1.3686,0.3653, 1.3700,0.3650, +1.3714,0.3646, 1.3729,0.3642, 1.3743,0.3638, 1.3757,0.3634, 1.3771,0.3631, +1.3785,0.3627, 1.3800,0.3623, 1.3814,0.3620, 1.3828,0.3616, 1.3842,0.3612, +1.3856,0.3609, 1.3870,0.3605, 1.3884,0.3601, 1.3898,0.3598, 1.3912,0.3594, +1.3926,0.3590, 1.3940,0.3587, 1.3954,0.3583, 1.3968,0.3580, 1.3982,0.3576, +1.3996,0.3572, 1.4010,0.3569, 1.4024,0.3565, 1.4038,0.3562, 1.4052,0.3558, +1.4066,0.3555, 1.4080,0.3551, 1.4094,0.3548, 1.4108,0.3544, 1.4121,0.3541, +1.4135,0.3537 +}; + + +/* Generated by: */ +#if 0 +#include <math.h> +#include <stdio.h> +#include <assert.h> + +int +main(int argc, char **argv) +{ + int i, j; + + printf ("const float __t_sqrt[1024] = {"); + for (i = 0; i < 2; i++) + { + putchar('\n'); + for (j = 0; j < 256; j++) + { + double mval = j/512.0 + 0.5; + double eval = i==0 ? 1.0 : 2.0; + double ls = sqrt(mval*eval); + double hs = sqrt((mval+1/512.0)*eval); + double as = (ls+hs)*0.5; + double lx = 1/(2.0*ls); + double hx = 1/(2.0*hs); + double ax = (lx+hx)*0.5; + + printf("%.4f,%.4f%s",as,ax, + i*j==255 ? "\n" : j % 5 == 4 ? ",\n" : ", "); + assert((hs-ls)/as < 1/256.0); + assert((hx-lx)/ax < 1/256.0); + } + } + printf ("};\n"); + return 0; +} +#endif /* 0 */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c new file mode 100644 index 0000000000..4e3f90d4d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ryan S. Arnold <rsa@us.ibm.com> + Sean Curry <spcurry@us.ibm.com> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <malloc.h> +#include <link.h> +#include <elf.h> +#include <fpu_control.h> +#include <sys/auxv.h> + +static ucontext_t ctx[3]; + + +volatile int global; + + +static int back_in_main; + + +volatile static ElfW(auxv_t) *auxv = NULL; + +ElfW(Addr) query_auxv(int type) +{ + FILE *auxv_f; + ElfW(auxv_t) auxv_struct; + ElfW(auxv_t) *auxv_temp; + int i = 0; + + /* if the /proc/self/auxv file has not been manually copied into the heap + yet, then do it */ + + if(auxv == NULL) + { + auxv_f = fopen("/proc/self/auxv", "r"); + + if(auxv_f == 0) + { + perror("Error opening file for reading"); + return 0; + } + auxv = (ElfW(auxv_t) *)malloc(getpagesize()); + + do + { + fread(&auxv_struct, sizeof(ElfW(auxv_t)), 1, auxv_f); + auxv[i] = auxv_struct; + i++; + } while(auxv_struct.a_type != AT_NULL); + } + + auxv_temp = (ElfW(auxv_t) *)auxv; + i = 0; + do + { + if(auxv_temp[i].a_type == type) + { + return auxv_temp[i].a_un.a_val; + } + i++; + } while (auxv_temp[i].a_type != AT_NULL); + + return 0; +} + +typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__))); +typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__))); + +#define _FPSCR_RESERVED 0xfffffff8ffffff04ULL + +#define _FPSCR_TEST0_DRN 0x0000000400000000ULL +#define _FPSCR_TEST0_RN 0x0000000000000003ULL + +#define _FPSCR_TEST1_DRN 0x0000000300000000ULL +#define _FPSCR_TEST1_RN 0x0000000000000002ULL + +/* Macros for accessing the hardware control word on Power6[x]. */ +#define _GET_DI_FPSCR(__fpscr) \ + ({union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + __asm__ ("mffs %0" : "=f" (fr)); \ + u.d = fr; \ + (__fpscr) = u.fpscr; \ + u.fpscr; \ + }) + +/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +# define _SET_DI_FPSCR(__fpscr) \ + { union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + u.fpscr = __fpscr; \ + fr = u.d; \ + /* Set the entire 64-bit FPSCR. */ \ + __asm__ (".machine push; " \ + ".machine \"power6\"; " \ + "mtfsf 255,%0,1,0; " \ + ".machine pop" : : "f" (fr)); \ + fr = 0.0; \ + } + +# define _GET_SI_FPSCR(__fpscr) \ + ({union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + __asm__ ("mffs %0" : "=f" (fr)); \ + u.d = fr; \ + (__fpscr) = (si_fpscr_t) u.fpscr; \ + (si_fpscr_t) u.fpscr; \ + }) + +/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +# define _SET_SI_FPSCR(__fpscr) \ + { union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + /* More-or-less arbitrary; this is a QNaN. */ \ + u.fpscr = 0xfff80000ULL << 32; \ + u.fpscr |= __fpscr & 0xffffffffULL; \ + fr = u.d; \ + __asm__ ("mtfsf 255,%0" : : "f" (fr)); \ + fr = 0.0; \ + } + +void prime_special_regs(int which) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + /* Overwrite the existing DRN and RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_DRN | _FPSCR_TEST1_RN)); + puts ("Priming 64-bit FPSCR with:"); + printf("0x%.16llx\n",(unsigned long long int)di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + puts ("32-bit FPSCR found and will be tested."); + _GET_SI_FPSCR(di_fpscr); + + /* Overwrite the existing RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_RN)); + puts ("Priming 32-bit FPSCR with:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void clear_special_regs(void) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + union { + double d; + unsigned long long int lli; + unsigned int li[2]; + } dlli; + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + +#if __WORDSIZE == 32 + dlli.d = ctx[0].uc_mcontext.uc_regs->fpregs.fpscr; +#else + dlli.d = ctx[0].uc_mcontext.fp_regs[32]; +#endif + + puts("The FPSCR value saved in the ucontext_t is:"); + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + printf("0x%.16llx\n",dlli.lli); + di_fpscr = 0x0; + puts ("Clearing the 64-bit FPSCR to:"); + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + printf("0x%.8x\n",(unsigned int) dlli.li[1]); + di_fpscr = 0x0; + puts ("Clearing the 32-bit FPSCR to:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void test_special_regs(int which) +{ + ElfW(Addr) a_val; + unsigned long long int test; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (2); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + if (which == 0) + puts ("After setcontext the 64-bit FPSCR contains:"); + else + puts ("After swapcontext the 64-bit FPSCR contains:"); + + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + test = (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN); + if((di_fpscr & (test)) != (test)) + { + printf ("%s: DRN and RN bits set before getcontext were not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (3); + } + } + else + { + _GET_SI_FPSCR(di_fpscr); + if (which == 0) + puts ("After setcontext the 32-bit FPSCR contains:"); + else + puts ("After swapcontext the 32-bit FPSCR contains:"); + + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + test = _FPSCR_TEST0_RN; + if((di_fpscr & test) != test) + { + printf ("%s: RN bit set before getcontext was not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (4); + } + } +} + + +static void +check_called (void) +{ + if (back_in_main == 0) + { + puts ("program did not reach main again"); + _exit (5); + } +} + + +int +main (void) +{ + atexit (check_called); + + puts ("priming the FPSCR with a marker"); + prime_special_regs (0); + + puts ("making contexts"); + if (getcontext (&ctx[0]) != 0) + { + if (errno == ENOSYS) + { + back_in_main = 1; + exit (0); + } + + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (6); + } + + /* Play some tricks with this context. */ + if (++global == 1) + { + clear_special_regs ( ); + if (setcontext (&ctx[0]) != 0) + { + printf ("%s: setcontext: %m\n", __FUNCTION__); + exit (7); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (8); + } + + test_special_regs (0); + + global = 0; + if (getcontext (&ctx[0]) != 0) + { + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (9); + } + + if (++global == 1) + { + puts ("priming the FPSCR with a marker"); + prime_special_regs (1); + + puts ("swapping contexts"); + if (swapcontext (&ctx[1], &ctx[0]) != 0) + { + printf ("%s: swapcontext: %m\n", __FUNCTION__); + exit (9); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (10); + } + + test_special_regs (1); + + puts ("back at main program"); + back_in_main = 1; + + puts ("test succeeded"); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu_control.h b/REORG.TODO/sysdeps/powerpc/fpu_control.h new file mode 100644 index 0000000000..842cbfa03f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu_control.h @@ -0,0 +1,120 @@ +/* FPU control word definitions. PowerPC version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FPU_CONTROL_H +#define _FPU_CONTROL_H + +#ifdef _SOFT_FLOAT + +# define _FPU_RESERVED 0xffffffff +# define _FPU_DEFAULT 0x00000000 /* Default value. */ +typedef unsigned int fpu_control_t; +# define _FPU_GETCW(cw) (cw) = 0 +# define _FPU_SETCW(cw) (void) (cw) +extern fpu_control_t __fpu_control; + +#elif defined __NO_FPRS__ /* e500 */ + +/* rounding control */ +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 + +/* masking of interrupts */ +# define _FPU_MASK_ZM 0x10 /* zero divide */ +# define _FPU_MASK_OM 0x04 /* overflow */ +# define _FPU_MASK_UM 0x08 /* underflow */ +# define _FPU_MASK_XM 0x40 /* inexact */ +# define _FPU_MASK_IM 0x20 /* invalid operation */ + +# define _FPU_RESERVED 0x00c10080 /* These bits are reserved and not changed. */ + +/* Correct IEEE semantics require traps to be enabled at the hardware + level; the kernel then does the emulation and determines whether + generation of signals from those traps was enabled using prctl. */ +# define _FPU_DEFAULT 0x0000003c /* Default value. */ +# define _FPU_IEEE _FPU_DEFAULT + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Macros for accessing the hardware control word. */ +# define _FPU_GETCW(cw) \ + __asm__ volatile ("mfspefscr %0" : "=r" (cw)) +# define _FPU_SETCW(cw) \ + __asm__ volatile ("mtspefscr %0" : : "r" (cw)) + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#else /* PowerPC 6xx floating-point. */ + +/* rounding control */ +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 + +# define _FPU_MASK_NI 0x04 /* non-ieee mode */ + +/* masking of interrupts */ +# define _FPU_MASK_ZM 0x10 /* zero divide */ +# define _FPU_MASK_OM 0x40 /* overflow */ +# define _FPU_MASK_UM 0x20 /* underflow */ +# define _FPU_MASK_XM 0x08 /* inexact */ +# define _FPU_MASK_IM 0x80 /* invalid operation */ + +# define _FPU_RESERVED 0xffffff00 /* These bits are reserved are not changed. */ + +/* The fdlibm code requires no interrupts for exceptions. */ +# define _FPU_DEFAULT 0x00000000 /* Default value. */ + +/* IEEE: same as above, but (some) exceptions; + we leave the 'inexact' exception off. + */ +# define _FPU_IEEE 0x000000f0 + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Macros for accessing the hardware control word. */ +# define _FPU_GETCW(cw) \ + ({union { double __d; unsigned long long __ll; } __u; \ + register double __fr; \ + __asm__ ("mffs %0" : "=f" (__fr)); \ + __u.__d = __fr; \ + (cw) = (fpu_control_t) __u.__ll; \ + (fpu_control_t) __u.__ll; \ + }) + +# define _FPU_SETCW(cw) \ + { union { double __d; unsigned long long __ll; } __u; \ + register double __fr; \ + __u.__ll = 0xfff80000LL << 32; /* This is a QNaN. */ \ + __u.__ll |= (cw) & 0xffffffffLL; \ + __fr = __u.__d; \ + __asm__ ("mtfsf 255,%0" : : "f" (__fr)); \ + } + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif /* PowerPC 6xx floating-point. */ + +#endif /* _FPU_CONTROL_H */ diff --git a/REORG.TODO/sysdeps/powerpc/gccframe.h b/REORG.TODO/sysdeps/powerpc/gccframe.h new file mode 100644 index 0000000000..87edbae05b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/gccframe.h @@ -0,0 +1,21 @@ +/* Definition of object in frame unwind info. powerpc version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define DWARF_FRAME_REGISTERS 77 + +#include <sysdeps/generic/gccframe.h> diff --git a/REORG.TODO/sysdeps/powerpc/hwcapinfo.c b/REORG.TODO/sysdeps/powerpc/hwcapinfo.c new file mode 100644 index 0000000000..82ad222c36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/hwcapinfo.c @@ -0,0 +1,84 @@ +/* powerpc HWCAP/HWCAP2 and AT_PLATFORM data pre-processing. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <shlib-compat.h> +#include <dl-procinfo.h> + +uint64_t __tcb_hwcap __attribute__ ((visibility ("hidden"))); +uint32_t __tcb_platform __attribute__ ((visibility ("hidden"))); + +/* This function parses the HWCAP/HWCAP2 fields, adding the previous supported + ISA bits, as well as converting the AT_PLATFORM string to a number. This + data is stored in two global variables that can be used later by the + powerpc-specific code to store it into the TCB. */ +void +__tcb_parse_hwcap_and_convert_at_platform (void) +{ + + uint64_t h1, h2; + + /* Read AT_PLATFORM string from auxv and convert it to a number. */ + __tcb_platform = _dl_string_platform (GLRO (dl_platform)); + + /* Read HWCAP and HWCAP2 from auxv. */ + h1 = GLRO (dl_hwcap); + h2 = GLRO (dl_hwcap2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + + if (h2 & PPC_FEATURE2_ARCH_2_07) + h1 |= PPC_FEATURE_ARCH_2_06 + | PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_ARCH_2_06) + h1 |= PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_ARCH_2_05) + h1 |= PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_POWER5_PLUS) + h1 |= PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_POWER5) + h1 |= PPC_FEATURE_POWER4; + + /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that + we can read both in a single load later. */ + __tcb_hwcap = h2; + __tcb_hwcap = (h1 << 32) | __tcb_hwcap; + +} +#if IS_IN (rtld) +versioned_symbol (ld, __tcb_parse_hwcap_and_convert_at_platform, \ + __parse_hwcap_and_convert_at_platform, GLIBC_2_23); +#endif + +/* Export __parse_hwcap_and_convert_at_platform in libc.a. This is used by + GCC to make sure that the HWCAP/Platform bits are stored in the TCB when + using __builtin_cpu_is()/__builtin_cpu_supports() in the static case. */ +#ifndef SHARED +weak_alias (__tcb_parse_hwcap_and_convert_at_platform, \ + __parse_hwcap_and_convert_at_platform); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/hwcapinfo.h b/REORG.TODO/sysdeps/powerpc/hwcapinfo.h new file mode 100644 index 0000000000..830948a855 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/hwcapinfo.h @@ -0,0 +1,29 @@ +/* powerpc HWCAP/HWCAP2 and AT_PLATFORM data pre-processing. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +#ifndef HWCAPINFO_H +# define HWCAPINFO_H + +extern uint64_t __tcb_hwcap attribute_hidden; +extern uint32_t __tcb_platform attribute_hidden; + +extern void __tcb_parse_hwcap_and_convert_at_platform (void); + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/ifunc-sel.h b/REORG.TODO/sysdeps/powerpc/ifunc-sel.h new file mode 100644 index 0000000000..bdb00bf2c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ifunc-sel.h @@ -0,0 +1,51 @@ +/* Used by the elf ifunc tests. */ +#ifndef ELF_IFUNC_SEL_H +#define ELF_IFUNC_SEL_H 1 + +extern int global; + +static inline void * +inhibit_stack_protector +ifunc_sel (int (*f1) (void), int (*f2) (void), int (*f3) (void)) +{ + register void *ret __asm__ ("r3"); + __asm__ ("mflr 12\n\t" + "bcl 20,31,1f\n" + "1:\tmflr 11\n\t" + "mtlr 12\n\t" + "addis 12,11,global-1b@ha\n\t" + "lwz 12,global-1b@l(12)\n\t" + "addis %0,11,%2-1b@ha\n\t" + "addi %0,%0,%2-1b@l\n\t" + "cmpwi 12,1\n\t" + "beq 2f\n\t" + "addis %0,11,%3-1b@ha\n\t" + "addi %0,%0,%3-1b@l\n\t" + "cmpwi 12,-1\n\t" + "beq 2f\n\t" + "addis %0,11,%4-1b@ha\n\t" + "addi %0,%0,%4-1b@l\n\t" + "2:" + : "=r" (ret) + : "i" (&global), "i" (f1), "i" (f2), "i" (f3) + : "11", "12", "cr0"); + return ret; +} + +static inline void * +inhibit_stack_protector +ifunc_one (int (*f1) (void)) +{ + register void *ret __asm__ ("r3"); + __asm__ ("mflr 12\n\t" + "bcl 20,31,1f\n" + "1:\tmflr %0\n\t" + "mtlr 12\n\t" + "addis %0,%0,%1-1b@ha\n\t" + "addi %0,%0,%1-1b@l" + : "=r" (ret) + : "i" (f1) + : "12"); + return ret; +} +#endif diff --git a/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h b/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h new file mode 100644 index 0000000000..e748a35f4d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h @@ -0,0 +1,36 @@ +/* Private macros for accessing __jmp_buf contents. PowerPC version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define JB_GPR1 0 /* Also known as the stack pointer */ +#define JB_GPR2 1 +#define JB_LR 2 /* The address we will return to */ +#if __WORDSIZE == 64 +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */ +# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */ +# define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */ +#else +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE ((64 + (12 * 4)) * 4) +# define JB_VRSAVE 62 +# define JB_VRS 64 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h b/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h new file mode 100644 index 0000000000..96e2af9e3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> +#include <sysdep.h> + +/* Test if longjmp to JMPBUF would unwind the frame + containing a local variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ + ((void *) (address) < (void *) demangle ((jmpbuf)[JB_GPR1])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf regs) +{ + uintptr_t sp = regs[JB_GPR1]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/powerpc/ldsodefs.h b/REORG.TODO/sysdeps/powerpc/ldsodefs.h new file mode 100644 index 0000000000..466de797fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ldsodefs.h @@ -0,0 +1,68 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _POWERPC_LDSODEFS_H +#define _POWERPC_LDSODEFS_H 1 + +#include <elf.h> + +struct La_ppc32_regs; +struct La_ppc32_retval; +struct La_ppc64_regs; +struct La_ppc64_retval; +struct La_ppc64v2_regs; +struct La_ppc64v2_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*ppc32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_ppc32_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*ppc64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_ppc64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*ppc64v2_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_ppc64v2_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep) + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*ppc32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc32_regs *, \ + struct La_ppc32_retval *, \ + const char *); \ + unsigned int (*ppc64_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc64_regs *, \ + struct La_ppc64_retval *, \ + const char *); \ + unsigned int (*ppc64v2_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc64v2_regs *,\ + struct La_ppc64v2_retval *, \ + const char *) + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/libc-tls.c b/REORG.TODO/sysdeps/powerpc/libc-tls.c new file mode 100644 index 0000000000..76d5c9abb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/libc-tls.c @@ -0,0 +1,32 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <csu/libc-tls.c> +#include <dl-tls.h> + +/* On powerpc, the linker usually optimizes code sequences used to access + Thread Local Storage. However, when the user disables these optimizations + by passing --no-tls-optimze to the linker, we need to provide __tls_get_addr + in static libc in order to avoid undefined references to that symbol. */ + +void * +__tls_get_addr (tls_index *ti) +{ + dtv_t *dtv = THREAD_DTV (); + return (char *) dtv[1].pointer.val + ti->ti_offset + TLS_DTV_OFFSET; +} diff --git a/REORG.TODO/sysdeps/powerpc/locale-defines.sym b/REORG.TODO/sysdeps/powerpc/locale-defines.sym new file mode 100644 index 0000000000..5c5379c39f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/locale-defines.sym @@ -0,0 +1,9 @@ +#include <locale/localeinfo.h> + +-- + +LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower) +LOCALE_CTYPE_TOUPPER offsetof (struct __locale_struct, __ctype_toupper) +_NL_CTYPE_NONASCII_CASE +LOCALE_DATA_VALUES offsetof (struct __locale_data, values) +SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) diff --git a/REORG.TODO/sysdeps/powerpc/longjmp.c b/REORG.TODO/sysdeps/powerpc/longjmp.c new file mode 100644 index 0000000000..bd3ed8c22b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/longjmp.c @@ -0,0 +1,60 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Versioned copy of sysdeps/generic/longjmp.c modified for AltiVec support. */ + +#include <shlib-compat.h> +#include <stddef.h> +#include <setjmp.h> +#include <signal.h> + +extern void __vmx__longjmp (__jmp_buf __env, int __val) + __attribute__ ((noreturn)); +extern void __vmx__libc_longjmp (sigjmp_buf env, int val) + __attribute__ ((noreturn)); +libc_hidden_proto (__vmx__libc_longjmp) + +/* Set the signal mask to the one specified in ENV, and jump + to the position specified in ENV, causing the setjmp + call there to return VAL, or 1 if VAL is 0. */ +void +__vmx__libc_siglongjmp (sigjmp_buf env, int val) +{ + /* Perform any cleanups needed by the frames being unwound. */ + _longjmp_unwind (env, val); + + if (env[0].__mask_was_saved) + /* Restore the saved signal mask. */ + (void) __sigprocmask (SIG_SETMASK, &env[0].__saved_mask, + (sigset_t *) NULL); + + /* Call the machine-dependent function to restore machine state. */ + __vmx__longjmp (env[0].__jmpbuf, val ?: 1); +} + +strong_alias (__vmx__libc_siglongjmp, __vmx__libc_longjmp) +libc_hidden_def (__vmx__libc_longjmp) +weak_alias (__vmx__libc_siglongjmp, __vmx_longjmp) +weak_alias (__vmx__libc_siglongjmp, __vmxlongjmp) +weak_alias (__vmx__libc_siglongjmp, __vmxsiglongjmp) + + +default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE); +default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE); +versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4); +versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4); +versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4); diff --git a/REORG.TODO/sysdeps/powerpc/machine-gmon.h b/REORG.TODO/sysdeps/powerpc/machine-gmon.h new file mode 100644 index 0000000000..3078426a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/machine-gmon.h @@ -0,0 +1,30 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* We need a special version of the `mcount' function because it has + to preserve more registers than your usual function. */ + +void __mcount_internal (unsigned long frompc, unsigned long selfpc); + +#define _MCOUNT_DECL(frompc, selfpc) \ +void __mcount_internal (unsigned long frompc, unsigned long selfpc) + + +/* Define MCOUNT as empty since we have the implementation in another + file. */ +#define MCOUNT diff --git a/REORG.TODO/sysdeps/powerpc/math-tests.h b/REORG.TODO/sysdeps/powerpc/math-tests.h new file mode 100644 index 0000000000..f7ba200cfc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/math-tests.h @@ -0,0 +1,33 @@ +/* Configuration for math tests. PowerPC version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* On PowerPC, in versions of GCC up to at least 4.7.2, a type cast -- which is + a IEEE 754-2008 general-computational convertFormat operation (IEEE + 754-2008, 5.4.2) -- does not turn a sNaN into a qNaN (whilst raising an + INVALID exception), which is contrary to IEEE 754-2008 5.1 and 7.2. This + renders certain tests infeasible in this scenario. + <http://gcc.gnu.org/PR56828>. */ +#define SNAN_TESTS_TYPE_CAST 0 + +#ifndef __NO_FPRS__ +/* Setting exception flags in FPSCR results in enabled traps for those + exceptions being taken. */ +# define EXCEPTION_SET_FORCES_TRAP 1 +#endif + +#include_next <math-tests.h> diff --git a/REORG.TODO/sysdeps/powerpc/memusage.h b/REORG.TODO/sysdeps/powerpc/memusage.h new file mode 100644 index 0000000000..69f098006a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/memusage.h @@ -0,0 +1,20 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define GETSP() ({ register uintptr_t stack_ptr asm ("%r1"); stack_ptr; }) + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c b/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c new file mode 100644 index 0000000000..4b5f29bfe5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c @@ -0,0 +1 @@ +/* __clz_tab not needed on powerpc. */ diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Makefile b/REORG.TODO/sysdeps/powerpc/nofpu/Makefile new file mode 100644 index 0000000000..35517b63a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Makefile @@ -0,0 +1,32 @@ +# Makefile fragment for PowerPC with no FPU. + +ifeq ($(subdir),soft-fp) +sysdep_routines += $(gcc-single-routines) $(gcc-double-routines) \ + sim-full atomic-feholdexcept atomic-feclearexcept \ + atomic-feupdateenv flt-rounds +endif + +ifeq ($(subdir),math) +libm-support += fenv_const +CPPFLAGS += -I../soft-fp/ +# The follow CFLAGS are a work around for GCC Bugzilla Bug 29253 +# "expand_abs wrong default code for floating point" +# As this is not a regression, a fix is not likely to go into +# gcc-4.1.1 and may be too late for gcc-4.2. So we need these flags +# until the fix in a gcc release and glibc drops support for earlier +# versions of gcc. +CFLAGS-e_atan2l.c += -fno-builtin-fabsl +CFLAGS-e_hypotl.c += -fno-builtin-fabsl +CFLAGS-e_powl.c += -fno-builtin-fabsl +CFLAGS-s_ccoshl.c += -fno-builtin-fabsl +CFLAGS-s_csinhl.c += -fno-builtin-fabsl +CFLAGS-s_clogl.c += -fno-builtin-fabsl +CFLAGS-s_clog10l.c += -fno-builtin-fabsl +CFLAGS-s_csinl.c += -fno-builtin-fabsl +CFLAGS-s_csqrtl.c += -fno-builtin-fabsl +CFLAGS-w_acosl_compat.c += -fno-builtin-fabsl +CFLAGS-w_asinl_compat.c += -fno-builtin-fabsl +CFLAGS-w_atanhl_compat.c += -fno-builtin-fabsl +CFLAGS-w_j0l_compat.c += -fno-builtin-fabsl +CFLAGS-w_j1l_compat.c += -fno-builtin-fabsl +endif diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs b/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs new file mode 100644 index 0000000000..87eadf3024 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs @@ -0,0 +1 @@ +soft-fp diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Versions b/REORG.TODO/sysdeps/powerpc/nofpu/Versions new file mode 100644 index 0000000000..9f569bd1a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Versions @@ -0,0 +1,29 @@ +libc { + GLIBC_2.3.2 { + __sim_exceptions; __sim_disabled_exceptions; __sim_round_mode; + __adddf3; __addsf3; __divdf3; __divsf3; __eqdf2; __eqsf2; + __extendsfdf2; __fixdfsi; __fixsfsi; + __fixunsdfsi; __fixunssfsi; + __floatsidf; __floatsisf; + __gedf2; __gesf2; __ledf2; __lesf2; __muldf3; __mulsf3; + __negdf2; __negsf2; __sqrtdf2; __sqrtsf2; __subdf3; + __subsf3; __truncdfsf2; + } + GLIBC_2.4 { + __floatundidf; __floatundisf; + __floatunsidf; __floatunsisf; + __unorddf2; __unordsf2; + __nedf2; __nesf2; + __gtdf2; __gtsf2; + __ltdf2; __ltsf2; + } + GLIBC_2.19 { + __atomic_feholdexcept; __atomic_feclearexcept; __atomic_feupdateenv; + __flt_rounds; + } + GLIBC_PRIVATE { + __sim_exceptions_thread; + __sim_disabled_exceptions_thread; + __sim_round_mode_thread; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c new file mode 100644 index 0000000000..6946e19337 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c @@ -0,0 +1,28 @@ +/* Clear floating-point exceptions for atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +void +__atomic_feclearexcept (void) +{ + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + __sim_exceptions_thread = 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c new file mode 100644 index 0000000000..50cf1b96ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c @@ -0,0 +1,38 @@ +/* Store current floating-point environment and clear exceptions for + atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +void +__atomic_feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + + u.l[0] = __sim_exceptions_thread; + /* The rounding mode is not changed by arithmetic, so no need to + save it. */ + u.l[1] = __sim_disabled_exceptions_thread; + *envp = u.fenv; + + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + __sim_exceptions_thread = 0; + __sim_disabled_exceptions_thread = FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c new file mode 100644 index 0000000000..e74178ff12 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c @@ -0,0 +1,37 @@ +/* Install given floating-point environment and raise exceptions for + atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +void +__atomic_feupdateenv (const fenv_t *envp) +{ + fenv_union_t u; + int saved_exceptions = __sim_exceptions_thread; + + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + u.fenv = *envp; + __sim_exceptions_thread |= u.l[0]; + __sim_disabled_exceptions_thread = u.l[1]; + if (saved_exceptions & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c new file mode 100644 index 0000000000..1c8e578afc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c @@ -0,0 +1,38 @@ +/* Clear floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__feclearexcept (int x) +{ + __sim_exceptions_thread &= ~x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c new file mode 100644 index 0000000000..6514f45a73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c @@ -0,0 +1,34 @@ +/* Disable exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <fenv.h> + +int +fedisableexcept (int x) +{ + int old_exceptions = ~__sim_disabled_exceptions_thread & FE_ALL_EXCEPT; + + __sim_disabled_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + + return old_exceptions; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c new file mode 100644 index 0000000000..cd71e4e9cd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c @@ -0,0 +1,33 @@ +/* Enable exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-supp.h" +#include <fenv.h> + +int +feenableexcept (int exceptions) +{ + int old_exceptions = ~__sim_disabled_exceptions_thread & FE_ALL_EXCEPT; + + __sim_disabled_exceptions_thread &= ~exceptions; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + + return old_exceptions; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c new file mode 100644 index 0000000000..079946c58e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c @@ -0,0 +1,45 @@ +/* Store current floating-point environment (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetenv (fenv_t *envp) +{ + fenv_union_t u; + + u.l[0] = __sim_exceptions_thread; + u.l[0] |= __sim_round_mode_thread; + u.l[1] = __sim_disabled_exceptions_thread; + + *envp = u.fenv; + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c new file mode 100644 index 0000000000..36f4f45b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c @@ -0,0 +1,27 @@ +/* Get floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fegetexcept (void) +{ + return (__sim_disabled_exceptions_thread ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c new file mode 100644 index 0000000000..2346858eda --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c @@ -0,0 +1,33 @@ +/* Store current floating-point control modes. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fegetmode (femode_t *modep) +{ + fenv_union_t u; + + u.l[0] = __sim_round_mode_thread; + u.l[1] = __sim_disabled_exceptions_thread; + + *modep = u.fenv; + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c new file mode 100644 index 0000000000..7d7dfbaeef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c @@ -0,0 +1,30 @@ +/* Return current rounding mode (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetround (void) +{ + return __sim_round_mode_thread; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c new file mode 100644 index 0000000000..b0dce0bed1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c @@ -0,0 +1,45 @@ +/* Store current floating-point environment and clear exceptions + (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + + /* Get the current state. */ + __fegetenv (envp); + + u.fenv = *envp; + /* Clear everything except the rounding mode. */ + u.l[0] &= 0x3; + /* Disable exceptions */ + u.l[1] = FE_ALL_EXCEPT; + + /* Put the new state in effect. */ + __fesetenv (&u.fenv); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c new file mode 100644 index 0000000000..451070bcdf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c @@ -0,0 +1,38 @@ +/* Constants for fenv_bits.h (soft float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* We want to specify the bit pattern of the __fe_*_env constants, so + pretend they're really `long long' instead of `double'. */ + +/* If the default argument is used we use this value. Disable all + signalling exceptions as default. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = +0x000000003e000000ULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = +0x0000000000000000ULL; + +/* Floating-point environment with the NI bit set. No difference for + soft float from the default environment. */ +strong_alias (__fe_dfl_env, __fe_nonieee_env) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c new file mode 100644 index 0000000000..f6ef1737bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c @@ -0,0 +1,47 @@ +/* Set floating point environment (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t u; + + u.fenv = *envp; + __sim_exceptions_thread = u.l[0] & FE_ALL_EXCEPT; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + __sim_round_mode_thread = u.l[0] & 0x3; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + __sim_disabled_exceptions_thread = u.l[1]; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c new file mode 100644 index 0000000000..c22ee2e952 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c @@ -0,0 +1,28 @@ +/* Set given exception flags. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-supp.h" + +int +fesetexcept (int excepts) +{ + __sim_exceptions_thread |= (excepts & FE_ALL_EXCEPT); + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c new file mode 100644 index 0000000000..85ce8b3ae1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c @@ -0,0 +1,34 @@ +/* Install given floating-point control modes. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t u; + + u.fenv = *modep; + __sim_round_mode_thread = u.l[0]; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + __sim_disabled_exceptions_thread = u.l[1]; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c new file mode 100644 index 0000000000..2ca60b5014 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c @@ -0,0 +1,36 @@ +/* Set rounding mode (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetround (int round) +{ + if ((unsigned int) round > FE_DOWNWARD) + return 1; + + __sim_round_mode_thread = round; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c new file mode 100644 index 0000000000..316b7a96f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c @@ -0,0 +1,53 @@ +/* Install given floating-point environment and raise exceptions + (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +int +__feupdateenv (const fenv_t *envp) +{ + int saved_exceptions; + + /* Save currently set exceptions. */ + saved_exceptions = __sim_exceptions_thread; + + /* Set environment. */ + __fesetenv (envp); + + /* Raise old exceptions. */ + __sim_exceptions_thread |= saved_exceptions; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (saved_exceptions & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c new file mode 100644 index 0000000000..4247be2352 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c @@ -0,0 +1,37 @@ +/* Store current representation for exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + *flagp = (fexcept_t) __sim_exceptions_thread & excepts & FE_ALL_EXCEPT; + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c b/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c new file mode 100644 index 0000000000..744ba95d60 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c @@ -0,0 +1,40 @@ +/* Return current rounding mode as correct value for FLT_ROUNDS. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__flt_rounds (void) +{ + switch (__sim_round_mode_thread) + { + case FP_RND_ZERO: + return 0; + case FP_RND_NEAREST: + return 1; + case FP_RND_PINF: + return 2; + case FP_RND_MINF: + return 3; + default: + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c new file mode 100644 index 0000000000..585c1c7d95 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c @@ -0,0 +1,43 @@ +/* Raise given exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +#undef feraiseexcept +int +__feraiseexcept (int x) +{ + __sim_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (x & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c new file mode 100644 index 0000000000..10b64285d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c @@ -0,0 +1,40 @@ +/* Set floating-point environment exception handling (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetexceptflag(const fexcept_t *flagp, int excepts) +{ + /* Ignore exceptions not listed in 'excepts'. */ + __sim_exceptions_thread + = (__sim_exceptions_thread & ~excepts) | (*flagp & excepts); + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c new file mode 100644 index 0000000000..56c804e558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c @@ -0,0 +1,28 @@ +/* Test floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fetestexcept (int x) +{ + return __sim_exceptions_thread & x; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h b/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h new file mode 100644 index 0000000000..4e6206e121 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h @@ -0,0 +1,35 @@ +/* Determine floating-point rounding mode within libc. PowerPC + soft-float version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _POWERPC_NOFPU_GET_ROUNDING_MODE_H +#define _POWERPC_NOFPU_GET_ROUNDING_MODE_H 1 + +#include <fenv.h> + +#include "soft-supp.h" + +/* Return the floating-point rounding mode. */ + +static inline int +get_rounding_mode (void) +{ + return __sim_round_mode_thread; +} + +#endif /* get-rounding-mode.h */ diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps new file mode 100644 index 0000000000..8935f0d714 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps @@ -0,0 +1,2364 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 4 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: "atan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "atan2_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cacos_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Real part of "cacosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "cacosh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "carg_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casin": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "casin_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "casin_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Real part of "casinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "casinh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catan_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catanh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "catanh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Real part of "clog10_upward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "clog_towardzero": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "cos": +float: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "cos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "cos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "cos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cpow_downward": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_towardzero": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erfc": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "erfc_downward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 10 +ldouble: 10 + +Function: "erfc_towardzero": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 9 +ldouble: 9 + +Function: "erfc_upward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 7 +ldouble: 7 + +Function: "exp": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "fma": +ildouble: 1 +ldouble: 1 + +Function: "fma_downward": +ildouble: 1 +ldouble: 1 + +Function: "fma_towardzero": +ildouble: 2 +ldouble: 2 + +Function: "fma_upward": +ildouble: 3 +ldouble: 3 + +Function: "fmod": +ildouble: 1 +ldouble: 1 + +Function: "fmod_downward": +ildouble: 1 +ldouble: 1 + +Function: "fmod_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "fmod_upward": +ildouble: 1 +ldouble: 1 + +Function: "gamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "gamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 16 +ldouble: 16 + +Function: "gamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 3 +ldouble: 3 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: "j0_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 8 +ldouble: 8 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1_upward": +double: 3 +float: 5 +idouble: 3 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "jn_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 + +Function: "ldexp_downward": +ildouble: 1 +ldouble: 1 + +Function: "ldexp_upward": +ildouble: 1 +ldouble: 1 + +Function: "lgamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "lgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 16 +ldouble: 16 + +Function: "lgamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log2": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "nextafter_downward": +ildouble: 1 +ldouble: 1 + +Function: "nextafter_upward": +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "scalb_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalb_upward": +ildouble: 1 +ldouble: 1 + +Function: "scalbln_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalbln_upward": +ildouble: 1 +ldouble: 1 + +Function: "scalbn_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalbn_upward": +ildouble: 1 +ldouble: 1 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 7 +ldouble: 7 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sqrt": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_downward": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_upward": +ildouble: 1 +ldouble: 1 + +Function: "tan": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "tan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "tan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "tan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "y0_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_upward": +double: 7 +float: 2 +idouble: 7 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "yn_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name new file mode 100644 index 0000000000..3ed2c6ec8a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name @@ -0,0 +1 @@ +PowerPC soft-float diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c b/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c new file mode 100644 index 0000000000..da48f3714e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c @@ -0,0 +1,57 @@ +/* Software floating-point exception handling emulation. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <signal.h> +#include "soft-fp.h" +#include "soft-supp.h" + +/* Thread-local to store sticky exceptions. */ +__thread int __sim_exceptions_thread __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_exceptions_thread); + +/* By default, no exceptions should trap. */ +__thread int __sim_disabled_exceptions_thread = 0xffffffff; +libc_hidden_data_def (__sim_disabled_exceptions_thread); + +__thread int __sim_round_mode_thread __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_round_mode_thread); + +#if SIM_GLOBAL_COMPAT +int __sim_exceptions_global __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_exceptions_global); +SIM_COMPAT_SYMBOL (__sim_exceptions_global, __sim_exceptions); + +int __sim_disabled_exceptions_global = 0xffffffff; +libc_hidden_data_def (__sim_disabled_exceptions_global); +SIM_COMPAT_SYMBOL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions); + +int __sim_round_mode_global __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_round_mode_global); +SIM_COMPAT_SYMBOL (__sim_round_mode_global, __sim_round_mode); +#endif + +void +__simulate_exceptions (int x) +{ + __sim_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (x & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h b/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h new file mode 100644 index 0000000000..f66d9573fa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h @@ -0,0 +1,63 @@ +/* Internal support stuff for complete soft float. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if defined __NO_FPRS__ && !defined _SOFT_FLOAT + +# include <fenv_libc.h> + +#else + +# include <fenv.h> + +typedef union +{ + fenv_t fenv; + unsigned int l[2]; +} fenv_union_t; + +#endif + +extern __thread int __sim_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_exceptions_thread, tls_model ("initial-exec")); +extern __thread int __sim_disabled_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_disabled_exceptions_thread, + tls_model ("initial-exec")); +extern __thread int __sim_round_mode_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_round_mode_thread, tls_model ("initial-exec")); + +/* These variables were formerly global, so there are compat symbols + for global versions as well. */ + +#include <shlib-compat.h> +#define SIM_GLOBAL_COMPAT SHLIB_COMPAT (libc, GLIBC_2_3_2, GLIBC_2_19) +#if SIM_GLOBAL_COMPAT +extern int __sim_exceptions_global; +libc_hidden_proto (__sim_exceptions_global); +extern int __sim_disabled_exceptions_global ; +libc_hidden_proto (__sim_disabled_exceptions_global); +extern int __sim_round_mode_global; +libc_hidden_proto (__sim_round_mode_global); +# define SIM_COMPAT_SYMBOL(GLOBAL_NAME, NAME) \ + compat_symbol (libc, GLOBAL_NAME, NAME, GLIBC_2_3_2) +# define SIM_SET_GLOBAL(GLOBAL_VAR, THREAD_VAR) ((GLOBAL_VAR) = (THREAD_VAR)) +#else +# define SIM_SET_GLOBAL(GLOBAL_VAR, THREAD_VAR) ((void) 0) +#endif + +extern void __simulate_exceptions (int x) attribute_hidden; diff --git a/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c b/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c new file mode 100644 index 0000000000..b0020b728a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c @@ -0,0 +1,56 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy of sysdeps/generic/longjmp.c modified for backward compatibility + with old non AltiVec/VMX longjmp. */ + +#include <bits/wordsize.h> +#include <shlib-compat.h> +#if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# include <stddef.h> +# include <novmxsetjmp.h> +# include <signal.h> + + +/* Set the signal mask to the one specified in ENV, and jump + to the position specified in ENV, causing the setjmp + call there to return VAL, or 1 if VAL is 0. */ +void +__novmx__libc_siglongjmp (__novmx__sigjmp_buf env, int val) +{ + /* Perform any cleanups needed by the frames being unwound. */ + _longjmp_unwind (env, val); + + if (env[0].__mask_was_saved) + /* Restore the saved signal mask. */ + (void) __sigprocmask (SIG_SETMASK, &env[0].__saved_mask, + (sigset_t *) NULL); + + /* Call the machine-dependent function to restore machine state. */ + __novmx__longjmp (env[0].__jmpbuf, val ?: 1); +} + +strong_alias (__novmx__libc_siglongjmp, __novmx__libc_longjmp) +libc_hidden_def (__novmx__libc_longjmp) +weak_alias (__novmx__libc_siglongjmp, __novmx_longjmp) +weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp) +weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp) + +compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0); +compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0); +compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0); +#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */ diff --git a/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c b/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c new file mode 100644 index 0000000000..7d0ae59437 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c @@ -0,0 +1,44 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy of sysdeps/generic/sigjmp.c modified for backward compatibility + with old non AltiVec/VMX setjmp. */ + +#include <bits/wordsize.h> +#include <shlib-compat.h> +#if IS_IN (libc) && defined SHARED +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# include <stddef.h> +# include <novmxsetjmp.h> +# include <signal.h> + +/* This function is called by the `sigsetjmp' macro + before doing a `__setjmp' on ENV[0].__jmpbuf. + Always return zero. */ + +int +__novmx__sigjmp_save (__novmx__sigjmp_buf env, int savemask) +{ + env[0].__mask_was_saved = (savemask && + __sigprocmask (SIG_BLOCK, (sigset_t *) NULL, + &env[0].__saved_mask) == 0); + + return 0; +} + +# endif /* SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */ +#endif /* IS_IN (libc) && SHARED */ diff --git a/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h b/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h new file mode 100644 index 0000000000..aa76bf9d1e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h @@ -0,0 +1,132 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copied from setjmp/setjmp.h, powerpc/bits/setjmp.h and modified + appropriately to keep backward compatible with setjmp without + AltiVec/VMX support. + + This file is not exported and the interfaces are private to libc. */ + +#ifndef __NOVMX_SETJMP_H +#define __NOVMX_SETJMP_H 1 + +#include <bits/wordsize.h> + +/* The following definitions are needed by ASM implementations of the old + (novmx) __longjmp/__setjmp functions. */ + +# define JB_GPR1 0 /* Also known as the stack pointer */ +# define JB_GPR2 1 +# define JB_LR 2 /* The address we will return to */ +# if __WORDSIZE == 64 +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (40 * 8) +# else +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (58 * 4) +# endif + +#ifndef _ASM +/* The following definitions are needed by the novmx* implementations of + setjmp/longjmp/sigsetjmp/etc that wrapper __setjmp/__longjmp. */ + +# if __WORDSIZE == 64 +typedef long int __jmp_buf[40]; +# else +typedef long int __jmp_buf[58]; +# endif + +# include <bits/types/__sigset_t.h> + +/* Calling environment, plus possibly a saved signal mask. */ +typedef struct __novmx__jmp_buf_tag + { + /* NOTE: The machine-dependent definitions of `__sigsetjmp' + assume that a `jmp_buf' begins with a `__jmp_buf' and that + `__mask_was_saved' follows it. Do not move these members + or add others before it. */ + __jmp_buf __jmpbuf; /* Calling environment. */ + int __mask_was_saved; /* Saved the signal mask? */ + __sigset_t __saved_mask; /* Saved signal mask. */ + } __novmx__jmp_buf[1]; + + +/* Store the calling environment in ENV, also saving the signal mask. + Return 0. */ +extern int __novmxsetjmp (__novmx__jmp_buf __env); + +/* Store the calling environment in ENV, also saving the + signal mask if SAVEMASK is nonzero. Return 0. + This is the internal name for `sigsetjmp'. */ +extern int __novmx__sigsetjmp (struct __novmx__jmp_buf_tag __env[1], + int __savemask); + +/* Store the calling environment in ENV, not saving the signal mask. + Return 0. */ +extern int __novmx_setjmp (struct __novmx__jmp_buf_tag __env[1]); + +/* Jump to the environment saved in ENV, making the + `setjmp' call there return VAL, or 1 if VAL is 0. */ +extern void __novmxlongjmp (struct __novmx__jmp_buf_tag __env[1], int __val) + __attribute__ ((__noreturn__)); + +/* Same. Usually `_longjmp' is used with `_setjmp', which does not save + the signal mask. But it is how ENV was saved that determines whether + `longjmp' restores the mask; `_longjmp' is just an alias. */ +extern void __novmx_longjmp (struct __novmx__jmp_buf_tag __env[1], int __val) + __attribute__ ((__noreturn__)); + +/* Use the same type for `jmp_buf' and `sigjmp_buf'. + The `__mask_was_saved' flag determines whether + or not `longjmp' will restore the signal mask. */ +typedef struct __novmx__jmp_buf_tag __novmx__sigjmp_buf[1]; + +/* Jump to the environment saved in ENV, making the + sigsetjmp call there return VAL, or 1 if VAL is 0. + Restore the signal mask if that sigsetjmp call saved it. + This is just an alias `longjmp'. */ +extern void __novmxsiglongjmp (__novmx__sigjmp_buf __env, int __val) + __attribute__ ((__noreturn__)); + +/* Internal machine-dependent function to restore context sans signal mask. */ +extern void __novmx__longjmp (__jmp_buf __env, int __val) + __attribute__ ((__noreturn__)); + +/* Internal function to possibly save the current mask of blocked signals + in ENV, and always set the flag saying whether or not it was saved. + This is used by the machine-dependent definition of `__sigsetjmp'. + Always returns zero, for convenience. */ +extern int __novmx__sigjmp_save (__novmx__jmp_buf __env, int __savemask); + +extern void _longjmp_unwind (__novmx__jmp_buf env, int val); + +extern void __novmx__libc_siglongjmp (__novmx__sigjmp_buf env, int val) + __attribute__ ((noreturn)); + +extern void __novmx__libc_longjmp (__novmx__sigjmp_buf env, int val) + __attribute__ ((noreturn)); + +libc_hidden_proto (__novmx__libc_longjmp) +libc_hidden_proto (__novmx_setjmp) +libc_hidden_proto (__novmx__sigsetjmp) +#endif /* !_ASM */ + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/Makefile b/REORG.TODO/sysdeps/powerpc/nptl/Makefile new file mode 100644 index 0000000000..13fa630f2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/Makefile @@ -0,0 +1,20 @@ +# Copyright (C) 2003-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..f29119b794 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,79 @@ +/* Machine-specific pthread type layouts. PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_ATTR_T 56 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +#else +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_ATTR_T 36 +# define __SIZEOF_PTHREAD_RWLOCK_T 32 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +#endif +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 1 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; +#if __WORDSIZE == 64 + int __cur_writer; + int __shared; + unsigned char __rwelision; + unsigned char __pad1[7]; + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, {0, 0, 0, 0, 0, 0, 0 } +#else + unsigned char __rwelision; + unsigned char __pad2; + unsigned char __shared; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned char __flags; + int __cur_writer; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0 +#endif +}; + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/powerpc/nptl/elide.h b/REORG.TODO/sysdeps/powerpc/nptl/elide.h new file mode 100644 index 0000000000..1c42814b71 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/elide.h @@ -0,0 +1,125 @@ +/* elide.h: Generic lock elision support for powerpc. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ELIDE_PPC_H +# define ELIDE_PPC_H + +#ifdef ENABLE_LOCK_ELISION +# include <htm.h> +# include <elision-conf.h> + +/* Get the new value of adapt_count according to the elision + configurations. Returns true if the system should retry again or false + otherwise. */ +static inline bool +__get_new_count (uint8_t *adapt_count, int attempt) +{ + /* A persistent failure indicates that a retry will probably + result in another failure. Use normal locking now and + for the next couple of calls. */ + if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ())) + { + if (__elision_aconf.skip_lock_internal_abort > 0) + *adapt_count = __elision_aconf.skip_lock_internal_abort; + return false; + } + /* Same logic as above, but for a number of temporary failures in a + a row. */ + else if (attempt <= 1 && __elision_aconf.skip_lock_out_of_tbegin_retries > 0 + && __elision_aconf.try_tbegin > 0) + *adapt_count = __elision_aconf.skip_lock_out_of_tbegin_retries; + return true; +} + +/* CONCURRENCY NOTES: + + The evaluation of the macro expression is_lock_free encompasses one or + more loads from memory locations that are concurrently modified by other + threads. For lock elision to work, this evaluation and the rest of the + critical section protected by the lock must be atomic because an + execution with lock elision must be equivalent to an execution in which + the lock would have been actually acquired and released. Therefore, we + evaluate is_lock_free inside of the transaction that represents the + critical section for which we want to use lock elision, which ensures + the atomicity that we require. */ + +/* Returns 0 if the lock defined by is_lock_free was elided. + ADAPT_COUNT is a per-lock state variable. */ +# define ELIDE_LOCK(adapt_count, is_lock_free) \ + ({ \ + int ret = 0; \ + if (adapt_count > 0) \ + (adapt_count)--; \ + else \ + for (int i = __elision_aconf.try_tbegin; i > 0; i--) \ + { \ + if (__libc_tbegin (0)) \ + { \ + if (is_lock_free) \ + { \ + ret = 1; \ + break; \ + } \ + __libc_tabort (_ABORT_LOCK_BUSY); \ + } \ + else \ + if (!__get_new_count (&adapt_count,i)) \ + break; \ + } \ + ret; \ + }) + +# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) \ + ({ \ + int ret = 0; \ + if (__elision_aconf.try_tbegin > 0) \ + { \ + if (write) \ + __libc_tabort (_ABORT_NESTED_TRYLOCK); \ + ret = ELIDE_LOCK (adapt_count, is_lock_free); \ + } \ + ret; \ + }) + + +static inline bool +__elide_unlock (int is_lock_free) +{ + if (is_lock_free) + { + /* This code is expected to crash when trying to unlock a lock not + held by this thread. More information is available in the + __pthread_rwlock_unlock() implementation. */ + __libc_tend (0); + return true; + } + return false; +} + +# define ELIDE_UNLOCK(is_lock_free) \ + __elide_unlock (is_lock_free) + +# else + +# define ELIDE_LOCK(adapt_count, is_lock_free) 0 +# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) 0 +# define ELIDE_UNLOCK(is_lock_free) 0 + +#endif /* ENABLE_LOCK_ELISION */ + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c new file mode 100644 index 0000000000..e377feb1cf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" + +int +pthread_spin_lock (pthread_spinlock_t *lock) +{ + unsigned int __tmp; + + asm volatile ( + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" + " cmpwi 0,%0,0\n" + " bne- 2f\n" + " stwcx. %2,0,%1\n" + " bne- 2f\n" + __ARCH_ACQ_INSTR "\n" + " .subsection 1\n" + "2: lwzx %0,0,%1\n" + " cmpwi 0,%0,0\n" + " bne 2b\n" + " b 1b\n" + " .previous" + : "=&r" (__tmp) + : "r" (lock), "r" (1) + : "cr0", "memory"); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c new file mode 100644 index 0000000000..d81d984237 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include "pthreadP.h" + +int +pthread_spin_trylock (pthread_spinlock_t *lock) +{ + unsigned int old; + int err = EBUSY; + + asm ("1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" + " cmpwi 0,%0,0\n" + " bne 2f\n" + " stwcx. %3,0,%2\n" + " bne- 1b\n" + " li %1,0\n" + __ARCH_ACQ_INSTR "\n" + "2: " + : "=&r" (old), "=&r" (err) + : "r" (lock), "r" (1), "1" (err) + : "cr0", "memory"); + + return err; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c new file mode 100644 index 0000000000..fa30a82491 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c @@ -0,0 +1,27 @@ +/* pthread_spin_unlock -- unlock a spin lock. PowerPC version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" +#include <lowlevellock.h> + +int +pthread_spin_unlock (pthread_spinlock_t *lock) +{ + atomic_store_release (lock, 0); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h b/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h new file mode 100644 index 0000000000..e3e407a4c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. The ABI requires 16 + bytes (for both 32-bit and 64-bit PowerPC). */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 4096 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..7c9fd33562 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym @@ -0,0 +1,32 @@ +#include <sysdep.h> +#include <tls.h> +#include <kernel-features.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. +# undef __thread_register +# define __thread_register ((void *) 0) +# define thread_offsetof(mem) ((ptrdiff_t) THREAD_SELF + offsetof (struct pthread, mem)) + + +#if TLS_MULTIPLE_THREADS_IN_TCB +MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads) +#endif +TID thread_offsetof (tid) +POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +TAR_SAVE (offsetof (tcbhead_t, tar_save) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +DSO_SLOT1 (offsetof (tcbhead_t, dso_slot1) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +DSO_SLOT2 (offsetof (tcbhead_t, dso_slot2) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifdef __powerpc64__ +TCB_AT_PLATFORM (offsetof (tcbhead_t, at_platform) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +#endif +TM_CAPABLE (offsetof (tcbhead_t, tm_capable) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifndef __powerpc64__ +TCB_AT_PLATFORM (offsetof (tcbhead_t, at_platform) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +PADDING (offsetof (tcbhead_t, padding) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +#endif +TCB_HWCAP (offsetof (tcbhead_t, hwcap) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifndef __ASSUME_PRIVATE_FUTEX +PRIVATE_FUTEX_OFFSET thread_offsetof (header.private_futex) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/tls.h b/REORG.TODO/sysdeps/powerpc/nptl/tls.h new file mode 100644 index 0000000000..7556e7c8b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/tls.h @@ -0,0 +1,263 @@ +/* Definition for thread-local data handling. NPTL/PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +# include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + + +#ifndef __ASSEMBLER__ + +# include <hwcapinfo.h> + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* We use the multiple_threads field in the pthread struct */ +#define TLS_MULTIPLE_THREADS_IN_TCB 1 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + + +/* The stack_guard is accessed directly by GCC -fstack-protector code, + so it is a part of public ABI. The dtv and pointer_guard fields + are private. */ +typedef struct +{ + /* Reservation for HWCAP data. To be accessed by GCC in + __builtin_cpu_supports(), so it is a part of public ABI. */ + uint64_t hwcap; + /* Reservation for AT_PLATFORM data. To be accessed by GCC in + __builtin_cpu_is(), so it is a part of public ABI. Since there + are different ABIs for 32 and 64 bit, we put this field in a + previously empty padding space for powerpc64. */ +#ifndef __powerpc64__ + /* Padding to maintain alignment. */ + uint32_t padding; + uint32_t at_platform; +#endif + /* Indicate if HTM capable (ISA 2.07). */ + uint32_t tm_capable; + /* Reservation for AT_PLATFORM data - powerpc64. */ +#ifdef __powerpc64__ + uint32_t at_platform; +#endif + /* Reservation for Dynamic System Optimizer ABI. */ + uintptr_t dso_slot2; + uintptr_t dso_slot1; + /* Reservation for tar register (ISA 2.07). */ + uintptr_t tar_save; + /* GCC split stack support. */ + void *__private_ss; + /* Reservation for the Event-Based Branching ABI. */ + uintptr_t ebb_handler; + uintptr_t ebb_ctx_pointer; + uintptr_t ebb_reserved1; + uintptr_t ebb_reserved2; + uintptr_t pointer_guard; + uintptr_t stack_guard; + dtv_t *dtv; +} tcbhead_t; + +/* This is the size of the initial TCB. */ +# define TLS_INIT_TCB_SIZE 0 + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size of the TCB. */ +# define TLS_TCB_SIZE 0 + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size we need before TCB. */ +# define TLS_PRE_TCB_SIZE \ + (sizeof (struct pthread) \ + + ((sizeof (tcbhead_t) + TLS_TCB_ALIGN - 1) & ~(TLS_TCB_ALIGN - 1))) + +# ifndef __powerpc64__ +/* Register r2 (tp) is reserved by the ABI as "thread pointer". */ +register void *__thread_register __asm__ ("r2"); +# define PT_THREAD_POINTER PT_R2 +# else +/* Register r13 (tp) is reserved by the ABI as "thread pointer". */ +register void *__thread_register __asm__ ("r13"); +# define PT_THREAD_POINTER PT_R13 +# endif + +/* The following assumes that TP (R2 or R13) points to the end of the + TCB + 0x7000 (per the ABI). This implies that TCB address is + TP - 0x7000. As we define TLS_DTV_AT_TP we can + assume that the pthread struct is allocated immediately ahead of the + TCB. This implies that the pthread_descr address is + TP - (TLS_PRE_TCB_SIZE + 0x7000). */ +# define TLS_TCB_OFFSET 0x7000 + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + ((tcbhead_t *) (tcbp))[-1].dtv = dtvp + 1 + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) (((tcbhead_t *) (tcbp))[-1].dtv) + +/* Code to initially initialize the thread pointer. This might need + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ +# define TLS_INIT_TP(tcbp) \ + ({ \ + __thread_register = (void *) (tcbp) + TLS_TCB_OFFSET; \ + THREAD_SET_TM_CAPABLE (__tcb_hwcap & PPC_FEATURE2_HAS_HTM ? 1 : 0); \ + THREAD_SET_HWCAP (__tcb_hwcap); \ + THREAD_SET_AT_PLATFORM (__tcb_platform); \ + NULL; \ + }) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) \ + void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE; \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].tm_capable) = \ + THREAD_GET_TM_CAPABLE (); \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].hwcap) = \ + THREAD_GET_HWCAP (); \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].at_platform) = \ + THREAD_GET_AT_PLATFORM (); + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) (__thread_register - TLS_TCB_OFFSET))[-1].dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *) (__thread_register \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE)) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +# define DB_THREAD_SELF \ + REGISTER (32, 32, PT_THREAD_POINTER * 4, \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) \ + REGISTER (64, 64, PT_THREAD_POINTER * 8, \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) + +/* Read member of the thread descriptor directly. */ +# define THREAD_GETMEM(descr, member) ((void)(descr), (THREAD_SELF)->member) + +/* Same as THREAD_GETMEM, but the member offset can be non-constant. */ +# define THREAD_GETMEM_NC(descr, member, idx) \ + ((void)(descr), (THREAD_SELF)->member[idx]) + +/* Set member of the thread descriptor directly. */ +# define THREAD_SETMEM(descr, member, value) \ + ((void)(descr), (THREAD_SELF)->member = (value)) + +/* Same as THREAD_SETMEM, but the member offset can be non-constant. */ +# define THREAD_SETMEM_NC(descr, member, idx, value) \ + ((void)(descr), (THREAD_SELF)->member[idx] = (value)) + +/* Set the stack guard field in TCB head. */ +# define THREAD_SET_STACK_GUARD(value) \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].stack_guard = (value)) +# define THREAD_COPY_STACK_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].stack_guard \ + = ((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].stack_guard) + +/* Set the stack guard field in TCB head. */ +# define THREAD_GET_POINTER_GUARD() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].pointer_guard) +# define THREAD_SET_POINTER_GUARD(value) \ + (THREAD_GET_POINTER_GUARD () = (value)) +# define THREAD_COPY_POINTER_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].pointer_guard \ + = THREAD_GET_POINTER_GUARD()) + +/* tm_capable field in TCB head. */ +# define THREAD_GET_TM_CAPABLE() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].tm_capable) +# define THREAD_SET_TM_CAPABLE(value) \ + (THREAD_GET_TM_CAPABLE () = (value)) + +/* hwcap field in TCB head. */ +# define THREAD_GET_HWCAP() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].hwcap) +# define THREAD_SET_HWCAP(value) \ + (THREAD_GET_HWCAP () = (value)) + +/* at_platform field in TCB head. */ +# define THREAD_GET_AT_PLATFORM() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].at_platform) +# define THREAD_SET_AT_PLATFORM(value) \ + (THREAD_GET_AT_PLATFORM () = (value)) + +/* l_tls_offset == 0 is perfectly valid on PPC, so we have to use some + different value to mean unset l_tls_offset. */ +# define NO_TLS_OFFSET -1 + +/* Get and set the global scope generation counter in struct pthread. */ +#define THREAD_GSCOPE_FLAG_UNUSED 0 +#define THREAD_GSCOPE_FLAG_USED 1 +#define THREAD_GSCOPE_FLAG_WAIT 2 +#define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +#define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +#define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +#endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile new file mode 100644 index 0000000000..e17d32f30e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile @@ -0,0 +1,7 @@ +# Makefile fragment for POWER4/5/5+ with FPU. + +ifeq ($(subdir),math) +CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops +CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +endif diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h new file mode 100644 index 0000000000..4754c1b0f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h @@ -0,0 +1,56 @@ +/* Overridable constants and operations. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +typedef double mantissa_t; +typedef double mantissa_store_t; + +#define TWOPOW(i) (0x1.0p##i) + +#define RADIX TWOPOW (24) /* 2^24 */ +#define CUTTER TWOPOW (76) /* 2^76 */ +#define RADIXI 0x1.0p-24 /* 2^-24 */ +#define TWO52 TWOPOW (52) /* 2^52 */ + +/* Divide D by RADIX and put the remainder in R. */ +#define DIV_RADIX(d,r) \ + ({ \ + double u = ((d) + CUTTER) - CUTTER; \ + if (u > (d)) \ + u -= RADIX; \ + r = (d) - u; \ + (d) = u * RADIXI; \ + }) + +/* Put the integer component of a double X in R and retain the fraction in + X. */ +#define INTEGER_OF(x, r) \ + ({ \ + double u = ((x) + TWO52) - TWO52; \ + if (u > (x)) \ + u -= 1; \ + (r) = u; \ + (x) -= u; \ + }) + +/* Align IN down to a multiple of F, where F is a power of two. */ +#define ALIGN_DOWN_TO(in, f) \ + ({ \ + double factor = f * TWO52; \ + double u = (in + factor) - factor; \ + if (u > in) \ + u -= f; \ + u; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c new file mode 100644 index 0000000000..0a0f7175b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c @@ -0,0 +1,214 @@ + +/* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. + * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* Define __mul and __sqr and use the rest from generic code. */ +#define NO__MUL +#define NO__SQR + +#include <sysdeps/ieee754/dbl-64/mpa.c> + +/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X + and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P + digits. In case P > 3 the error is bounded by 1.001 ULP. */ +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, i1, i2, j, k, k2; + long p2 = p; + double u, zk, zk2; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] * Y[0] == 0)) + { + Z[0] = 0; + return; + } + + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = 0; + for (k = k2; k > 1;) + { + if (k > p2) + { + i1 = k - p2; + i2 = p2 + 1; + } + else + { + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; + } +#else + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; +#endif + + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } + Z[k] = zk; + + int e = EX + EY; + /* Is there a carry beyond the most significant digit? */ + if (Z[1] == 0) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + e--; + } + + EZ = e; + Z[0] = X[0] * Y[0]; +} + +/* Square *X and store result in *Y. X and Y may not overlap. For P in + [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the + error is bounded by 1.001 ULP. This is a faster special case of + multiplication. */ +void +__sqr (const mp_no *x, mp_no *y, int p) +{ + long i, j, k, ip; + double u, yk; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] == 0)) + { + Y[0] = 0; + return; + } + + /* We need not iterate through all X's since it's pointless to + multiply zeroes. */ + for (ip = p; ip > 0; ip--) + if (X[ip] != 0) + break; + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > 2 * ip + 1) + Y[k--] = 0; + + yk = 0; + + while (k > p) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* In __mul, this loop (and the one within the next while loop) run + between a range to calculate the mantissa as follows: + + Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] + + X[n] * Y[k] + + For X == Y, we can get away with summing halfway and doubling the + result. For cases where the range size is even, the mid-point needs + to be added separately (above). */ + for (i = k - p, j = p; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + + while (k > 1) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* Likewise for this loop. */ + for (i = 1, j = k - 1; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + Y[k] = yk; + + /* Squares are always positive. */ + Y[0] = 1.0; + + int e = EX * 2; + /* Is there a carry beyond the most significant digit? */ + if (__glibc_unlikely (Y[1] == 0)) + { + for (i = 1; i <= p; i++) + Y[i] = Y[i + 1]; + e--; + } + EY = e; +} diff --git a/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c b/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c new file mode 100644 index 0000000000..2648e25c58 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c @@ -0,0 +1,212 @@ +/* _memcopy.c -- subroutines for memory copy functions. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include <stddef.h> +#include <memcopy.h> + +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ + +#ifndef WORDCOPY_FWD_ALIGNED +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned +#endif + +void +WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = a0; + ((op_t *) dstp)[1] = a1; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + DSTP should be aligned for memory operations on `op_t's, but SRCP must + *not* be aligned. */ + +#ifndef WORDCOPY_FWD_DEST_ALIGNED +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned +#endif + +void +WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp &= -OPSIZ; + a0 = ((op_t *) srcp)[0]; + + if (len & 1) + { + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + + if (len == 1) + return; + + a0 = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a1 = ((op_t *) srcp)[1]; + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2); + a0 = a2; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_bwd_aligned -- Copy block finishing right before + SRCP to block finishing right before DSTP with LEN `op_t' words + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory + operations on `op_t's. */ + +#ifndef WORDCOPY_BWD_ALIGNED +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned +#endif + +void +WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = a1; + ((op_t *) dstp)[0] = a0; + + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right + before SRCP to block finishing right before DSTP with LEN `op_t' + words (not LEN bytes!). DSTP should be aligned for memory + operations on `op_t', but SRCP must *not* be aligned. */ + +#ifndef WORDCOPY_BWD_DEST_ALIGNED +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned +#endif + +void +WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make srcp aligned by rounding it down to the beginning of the op_t + it points in the middle of. */ + srcp &= -OPSIZ; + a2 = ((op_t *) srcp)[0]; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + if (len == 1) + return; + + a2 = a1; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2); + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + a2 = a0; + + len -= 2; + } + while (len != 0); +} diff --git a/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c new file mode 100644 index 0000000000..ee0c62874b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <math_ldbl_opt.h> + +double +__modf (double x, double *iptr) +{ + if (__builtin_isinf (x)) + { + *iptr = x; + return __copysign (0.0, x); + } + else if (__builtin_isnan (x)) + { + *iptr = NAN; + return NAN; + } + + if (x >= 0.0) + { + *iptr = __floor (x); + return __copysign (x - *iptr, x); + } + else + { + *iptr = __ceil (x); + return __copysign (x - *iptr, x); + } +} +weak_alias (__modf, modf) +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c new file mode 100644 index 0000000000..35bed46fa7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c @@ -0,0 +1,46 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> + +float +__modff (float x, float *iptr) +{ + if (__builtin_isinff (x)) + { + *iptr = x; + return __copysignf (0.0, x); + } + else if (__builtin_isnanf (x)) + { + *iptr = NAN; + return NAN; + } + + if (x >= 0.0) + { + *iptr = __floorf (x); + return __copysignf (x - *iptr, x); + } + else + { + *iptr = __ceilf (x); + return __copysignf (x - *iptr, x); + } +} +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcschr.c b/REORG.TODO/sysdeps/powerpc/power6/wcschr.c new file mode 100644 index 0000000000..a416f31f7c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcschr.c @@ -0,0 +1,96 @@ +/* wcschr.c - Wide Character Search for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#ifndef WCSCHR +# define WCSCHR __wcschr +# define DEFAULT_WCSCHR +#endif + +/* Find the first occurrence of WC in WCS. */ +wchar_t * +WCSCHR (const wchar_t *wcs, const wchar_t wc) +{ + const wchar_t *wcs2 = wcs + 1; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + + do + { + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + } + while (*wcs != L'\0'); + + return NULL; +} +#ifdef DEFAULT_WCSCHR +libc_hidden_def (__wcschr) +weak_alias (__wcschr, wcschr) +libc_hidden_weak (wcschr) +#else +libc_hidden_def (wcschr) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c b/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c new file mode 100644 index 0000000000..e6de240746 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c @@ -0,0 +1,105 @@ +/* wcscpy.c - Wide Character Copy for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> +#include <wchar.h> + +#ifndef WCSCPY +# define WCSCPY wcscpy +#endif + +/* Copy SRC to DEST. */ +wchar_t * +WCSCPY (wchar_t *dest, const wchar_t *src) +{ + wint_t c,d; + wchar_t *wcp, *wcp2; + + if (__alignof__ (wchar_t) >= sizeof (wchar_t)) + { + const ptrdiff_t off = dest - src; + + wcp = (wchar_t *) src; + wcp2 = wcp + 1 ; + + do + { + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + } + while (c != L'\0'); + + } + else + { + wcp = dest; + + do + { + c = *src++; + *wcp++ = c; + } + while (c != L'\0'); + } + return dest; +} diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c new file mode 100644 index 0000000000..f9fb399b31 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c @@ -0,0 +1,89 @@ +/* wcsrchr.c - Wide Character Reverse Search for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#ifndef WCSRCHR +# define WCSRCHR wcsrchr +#endif + +/* Find the last occurrence of WC in WCS. */ +wchar_t * +WCSRCHR (const wchar_t *wcs, const wchar_t wc) +{ + const wchar_t *wcs2 = wcs + 1; + const wchar_t *retval = NULL; + + if (*wcs == wc) + retval = wcs; + + if (*wcs == L'\0') return (wchar_t *) retval; + + do + { + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + } + while (*wcs != L'\0'); + + return (wchar_t *) retval; +} diff --git a/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c b/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c new file mode 100644 index 0000000000..545a67bf5d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c @@ -0,0 +1,221 @@ +/* _memcopy.c -- subroutines for memory copy functions. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include <stddef.h> +#include <memcopy.h> + +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ + +#ifndef WORDCOPY_FWD_ALIGNED +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned +#endif + +void +WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = a0; + ((op_t *) dstp)[1] = a1; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + DSTP should be aligned for memory operations on `op_t's, but SRCP must + *not* be aligned. */ + +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (__WORDSIZE-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (__WORDSIZE-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + +#ifndef WORDCOPY_FWD_DEST_ALIGNED +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned +#endif + +void +WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + int align; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + align = srcp % OPSIZ; + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp &= -OPSIZ; + a0 = ((op_t *) srcp)[0]; + + if (len & 1) + { + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + + if (len == 1) + return; + + a0 = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + fwd_align_merge (align); + +} + +/* _wordcopy_bwd_aligned -- Copy block finishing right before + SRCP to block finishing right before DSTP with LEN `op_t' words + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory + operations on `op_t's. */ + +#ifndef WORDCOPY_BWD_ALIGNED +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned +#endif + +void +WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = a1; + ((op_t *) dstp)[0] = a0; + + len -= 2; + } + while (len != 0); +} + +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (__WORDSIZE-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (__WORDSIZE-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right + before SRCP to block finishing right before DSTP with LEN `op_t' + words (not LEN bytes!). DSTP should be aligned for memory + operations on `op_t', but SRCP must *not* be aligned. */ + +#ifndef WORDCOPY_BWD_DEST_ALIGNED +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned +#endif + +void +WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + int align; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + align = srcp % OPSIZ; + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make srcp aligned by rounding it down to the beginning of the op_t + it points in the middle of. */ + srcp &= -OPSIZ; + a2 = ((op_t *) srcp)[0]; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + if (len == 1) + return; + + a2 = a1; + len -= 1; + } + + bwd_align_merge (align); +} diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c new file mode 100644 index 0000000000..af74b1d024 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c @@ -0,0 +1,79 @@ +/* logb(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_ldbl_opt.h> + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* 2**10 -1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +double +__logb (double x) +{ + double ret; + + if (__builtin_expect (x == 0.0, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0 / __builtin_fabs (x); + + /* ret = x & 0x7ff0000000000000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f" (ret) + : "f" (x), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0; */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two10m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (x * x); + else if (__builtin_expect (ret == two10m1, 0)) + { + /* POSIX specifies that denormal numbers are treated as + though they were normalized. */ + int32_t lx, ix; + int ma; + + EXTRACT_WORDS (ix, lx, x); + ix &= 0x7fffffff; + if (ix == 0) + ma = __builtin_clz (lx) + 32; + else + ma = __builtin_clz (ix); + return (double) (-1023 - (ma - 12)); + } + /* Test to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +weak_alias (__logb, logb) +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c new file mode 100644 index 0000000000..1461327cd1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c @@ -0,0 +1,60 @@ +/* logbf(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "math_private.h" + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* -2**10 + 1 */ +static const double two7m1 = -127.0; /* -2**7 + 1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +float +__logbf (float x) +{ + /* VSX operation are all done internally as double. */ + double ret; + + if (__builtin_expect (x == 0.0, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0 / __builtin_fabsf (x); + + /* ret = x & 0x7f800000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f"(ret) + : "f" (x), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0, since ret is double. */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two7m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (x * x); + /* Since operations are done with double we don't need + additional tests for subnormal numbers. + The test is to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c new file mode 100644 index 0000000000..3ae383a831 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c @@ -0,0 +1,83 @@ +/* logbl(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <math_ldbl_opt.h> + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* 2**10 -1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +long double +__logbl (long double x) +{ + double xh, xl; + double ret; + int64_t hx; + + if (__builtin_expect (x == 0.0L, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0L / __builtin_fabsl (x); + + ldbl_unpack (x, &xh, &xl); + EXTRACT_WORDS64 (hx, xh); + /* ret = x & 0x7ff0000000000000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f" (ret) + : "f" (xh), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0; */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two10m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (xh * xh); + else if (__builtin_expect (ret == two10m1, 0)) + { + /* POSIX specifies that denormal number is treated as + though it were normalized. */ + return (long double) (- (__builtin_clzll (hx & 0x7fffffffffffffffLL) \ + - 12) - 1023); + } + else if ((hx & 0x000fffffffffffffLL) == 0) + { + /* If the high part is a power of 2, and the low part is nonzero + with the opposite sign, the low part affects the + exponent. */ + int64_t lx, rhx; + EXTRACT_WORDS64 (lx, xl); + rhx = (hx & 0x7ff0000000000000LL) >> 52; + if ((hx ^ lx) < 0 && (lx & 0x7fffffffffffffffLL) != 0) + rhx--; + return (long double) (rhx - 1023); + } + /* Test to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +#ifndef __logbl +long_double_symbol (libm, __logbl, logbl); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S new file mode 100644 index 0000000000..d1865140eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S @@ -0,0 +1,128 @@ +/* Optimized memcmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memcmp + + r3:source1 address, return equality + r4:source2 address + r5:byte count + + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If count = zero check bytes before zero counter and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (memcmp, 5, 0) + srwi. r6,r5,5 + beq L(preword2_count_loop) + mtctr r6 + clrlwi r5,r5,27 + +L(word8_compare_loop): + lwz r10,0(r3) + lwz r6,4(r3) + lwz r8,0(r4) + lwz r9,4(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,8(r3) + lwz r6,12(r3) + lwz r8,8(r4) + lwz r9,12(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,16(r3) + lwz r6,20(r3) + lwz r8,16(r4) + lwz r9,20(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + bdnz L(word8_compare_loop) + +L(preword2_count_loop): + srwi. r6,r5,3 + beq L(prebyte_count_loop) + mtctr r6 + clrlwi r5,r5,29 + +L(word2_count_loop): + lwz r10,0(r3) + lwz r6,4(r3) + addi r3,r3,0x08 + lwz r8,0(r4) + lwz r9,4(r4) + addi r4,r4,0x08 + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + bdnz L(word2_count_loop) + +L(prebyte_count_loop): + addi r5,r5,1 + mtctr r5 + bdz L(end_memcmp) + +L(byte_count_loop): + lbz r6,0(r3) + addi r3,r3,0x01 + lbz r8,0(r4) + addi r4,r4,0x01 + cmplw cr5,r8,r6 + bne cr5,L(st2) + bdnz L(byte_count_loop) + +L(end_memcmp): + addi r3,r0,0 + blr + +L(l_r): + addi r3,r0,1 + blr + +L(st1): + blt cr1,L(l_r) + addi r3,r0,-1 + blr + +L(st2): + blt cr5,L(l_r) + addi r3,r0,-1 + blr +END (memcmp) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp,bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S new file mode 100644 index 0000000000..9878dbceac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S @@ -0,0 +1,130 @@ +/* Optimized memcpy implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memcpy + + r0:return address + r3:destination address + r4:source address + r5:byte count + + Save return address in r0. + If destinationn and source are unaligned and copy count is greater than 256 + then copy 0-3 bytes to make destination aligned. + If 32 or more bytes to copy we use 32 byte copy loop. + Finaly we copy 0-31 extra bytes. */ + +EALIGN (memcpy, 5, 0) +/* Check if bytes to copy are greater than 256 and if + source and destination are unaligned */ + cmpwi r5,0x0100 + addi r0,r3,0 + ble L(string_count_loop) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(string_count_loop) + neg r6,r4 + clrlwi. r6,r6,30 + beq L(string_count_loop) + mtctr r6 + subf r5,r6,r5 + +L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */ + lbz r8,0x0(r4) + addi r4,r4,1 + stb r8,0x0(r3) + addi r3,r3,1 + bdnz L(unaligned_bytecopy_loop) + srwi. r7,r5,5 + beq L(preword2_count_loop) + mtctr r7 + +L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,0(r4) + lwz r7,4(r4) + lwz r8,8(r4) + lwz r9,12(r4) + subi r5,r5,0x20 + stw r6,0(r3) + stw r7,4(r3) + stw r8,8(r3) + stw r9,12(r3) + lwz r6,16(r4) + lwz r7,20(r4) + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + stw r6,16(r3) + stw r7,20(r3) + stw r8,24(r3) + stw r9,28(r3) + addi r3,r3,0x20 + bdnz L(word8_count_loop_no_dcbt) + +L(preword2_count_loop): /* Copy remaining 0-31 bytes */ + clrlwi. r12,r5,27 + beq L(end_memcpy) + mtxer r12 + lswx r5,0,r4 + stswx r5,0,r3 + mr r3,r0 + blr + +L(string_count_loop): /* Copy odd 0-31 bytes */ + clrlwi. r12,r5,28 + add r3,r3,r5 + add r4,r4,r5 + beq L(pre_string_copy) + mtxer r12 + subf r4,r12,r4 + subf r3,r12,r3 + lswx r6,0,r4 + stswx r6,0,r3 + +L(pre_string_copy): /* Check how many 32 byte chunks to copy */ + srwi. r7,r5,4 + beq L(end_memcpy) + mtctr r7 + +L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdz L(end_memcpy) + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdnz L(word4_count_loop_no_dcbt) + +L(end_memcpy): + mr r3,r0 + blr +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S new file mode 100644 index 0000000000..18aea515ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S @@ -0,0 +1,152 @@ +/* Optimized memset for PowerPC405,440,464 (32-byte cacheline). + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset + + r3:destination address and return address + r4:source integer to copy + r5:byte count + r11:sources integer to copy in all 32 bits of reg + r12:temp return address + + Save return address in r12 + If destinationn is unaligned and count is greater tha 255 bytes + set 0-3 bytes to make destination aligned + If count is greater tha 255 bytes and setting zero to memory + use dbcz to set memeory when we can + otherwsie do the follwoing + If 16 or more words to set we use 16 word copy loop. + Finaly we set 0-15 extra bytes with string store. */ + +EALIGN (memset, 5, 0) + rlwinm r11,r4,0,24,31 + rlwimi r11,r4,8,16,23 + rlwimi r11,r11,16,0,15 + addi r12,r3,0 + cmpwi r5,0x00FF + ble L(preword8_count_loop) + cmpwi r4,0x00 + beq L(use_dcbz) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(preword8_count_loop) + addi r8,0,1 + mtctr r6 + subi r3,r3,1 + +L(unaligned_bytecopy_loop): + stbu r11,0x1(r3) + subf. r5,r8,r5 + beq L(end_memset) + bdnz L(unaligned_bytecopy_loop) + addi r3,r3,1 + +L(preword8_count_loop): + srwi. r6,r5,4 + beq L(preword2_count_loop) + mtctr r6 + addi r3,r3,-4 + mr r8,r11 + mr r9,r11 + mr r10,r11 + +L(word8_count_loop_no_dcbt): + stwu r8,4(r3) + stwu r9,4(r3) + subi r5,r5,0x10 + stwu r10,4(r3) + stwu r11,4(r3) + bdnz L(word8_count_loop_no_dcbt) + addi r3,r3,4 + +L(preword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + +L(end_memset): + addi r3,r12,0 + blr + +L(use_dcbz): + neg r6,r3 + clrlwi. r7,r6,28 + beq L(skip_string_loop) + mr r8,r11 + mr r9,r11 + mr r10,r11 + subf r5,r7,r5 + mtxer r7 + stswx r8,0,r3 + add r3,r3,r7 + +L(skip_string_loop): + clrlwi r8,r6,27 + srwi. r8,r8,4 + beq L(dcbz_pre_loop) + mtctr r8 + +L(word_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(word_loop) + +L(dcbz_pre_loop): + srwi r6,r5,5 + mtctr r6 + addi r7,0,0 + +L(dcbz_loop): + dcbz r3,r7 + addi r3,r3,0x20 + subi r5,r5,0x20 + bdnz L(dcbz_loop) + srwi. r6,r5,4 + beq L(postword2_count_loop) + mtctr r6 + +L(postword8_count_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(postword8_count_loop) + +L(postword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + b L(end_memset) +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S new file mode 100644 index 0000000000..42e04e9552 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S @@ -0,0 +1,134 @@ +/* Optimized strcmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strcmp + + Register Use + r0:temp return equality + r3:source1 address, return equality + r4:source2 address + + Implementation description + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If null check bytes before null and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (strcmp,5,0) + neg r7,r3 + clrlwi r7,r7,20 + neg r8,r4 + clrlwi r8,r8,20 + srwi. r7,r7,5 + beq L(byte_loop) + srwi. r8,r8,5 + beq L(byte_loop) + cmplw r7,r8 + mtctr r7 + ble L(big_loop) + mtctr r8 + +L(big_loop): + lwz r5,0(r3) + lwz r6,4(r3) + lwz r8,0(r4) + lwz r9,4(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,8(r3) + lwz r6,12(r3) + lwz r8,8(r4) + lwz r9,12(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,16(r3) + lwz r6,20(r3) + lwz r8,16(r4) + lwz r9,20(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + bdnz L(big_loop) + b L(byte_loop) + +L(end_check): + subfic r12,r12,4 + blt L(end_check2) + rlwinm r12,r12,3,0,31 + srw r5,r5,r12 + srw r8,r8,r12 + cmplw r5,r8 + bne L(st1) + b L(end_strcmp) + +L(end_check2): + addi r12,r12,4 + cmplw r5,r8 + rlwinm r12,r12,3,0,31 + bne L(st1) + srw r6,r6,r12 + srw r9,r9,r12 + cmplw r6,r9 + bne L(st1) + +L(end_strcmp): + addi r3,r0,0 + blr + +L(st1): + mfcr r3 + blr + +L(byte_loop): + lbz r5,0(r3) + addi r3,r3,1 + lbz r6,0(r4) + addi r4,r4,1 + cmplw r5,r6 + bne L(st1) + cmpwi r5,0 + beq L(end_strcmp) + b L(byte_loop) +END (strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S new file mode 100644 index 0000000000..2a554dc32e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S @@ -0,0 +1,107 @@ +/* Optimized strcpy implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strcpy + + Register Use + r3:destination and return address + r4:source address + r10:temp destination address + + Implementation description + Loop by checking 2 words at a time, with dlmzb. Check if there is a null + in the 2 words. If there is a null jump to end checking to determine + where in the last 8 bytes it is. Copy the appropriate bytes of the last + 8 according to the null position. */ + +EALIGN (strcpy, 5, 0) + neg r7,r4 + subi r4,r4,1 + clrlwi. r8,r7,29 + subi r10,r3,1 + beq L(pre_word8_loop) + mtctr r8 + +L(loop): + lbzu r5,0x01(r4) + cmpi cr5,r5,0x0 + stbu r5,0x01(r10) + beq cr5,L(end_strcpy) + bdnz L(loop) + +L(pre_word8_loop): + subi r4,r4,3 + subi r10,r10,3 + +L(word8_loop): + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + b L(word8_loop) + +L(last_bytes_copy): + stwu r5,0x04(r10) + subi r11,r11,4 + mtctr r11 + addi r10,r10,3 + subi r4,r4,1 + +L(last_bytes_copy_loop): + lbzu r5,0x01(r4) + stbu r5,0x01(r10) + bdnz L(last_bytes_copy_loop) + blr + +L(byte_copy): + blt L(last_bytes_copy) + mtctr r11 + addi r10,r10,3 + subi r4,r4,5 + +L(last_bytes_copy_loop2): + lbzu r5,0x01(r4) + stbu r5,0x01(r10) + bdnz L(last_bytes_copy_loop2) + +L(end_strcpy): + blr +END (strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S new file mode 100644 index 0000000000..25e54dafc7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S @@ -0,0 +1,75 @@ +/* Optimized strlen implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strlen + + Register Use + r3:source address and return length of string + r4:byte counter + + Implementation description + Load 2 words at a time and count bytes, if we find null we subtract one from + the count and return the count value. We need to subtract one because + we don't count the null character as a byte. */ + +EALIGN (strlen,5,0) + neg r7,r3 + clrlwi. r8,r7,29 + addi r4,0,0 + beq L(byte_count_loop) + mtctr r8 + +L(loop): + lbz r5,0(r3) + cmpi cr5,r5,0x0 + addi r3,r3,0x1 + addi r4,r4,0x1 + beq cr5,L(end_strlen) + bdnz L(loop) + +L(byte_count_loop): + lwz r5,0(r3) + lwz r6,4(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,8(r3) + lwz r6,12(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,16(r3) + lwz r6,20(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + b L(byte_count_loop) + +L(end_strlen): + addi r3,r4,-1 + blr +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S new file mode 100644 index 0000000000..9c5a8feb9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S @@ -0,0 +1,128 @@ +/* Optimized strncmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strncmp + + Register Use + r0:temp return equality + r3:source1 address, return equality + r4:source2 address + r5:byte count + + Implementation description + Touch in 3 lines of D-cache. + If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If null check bytes before null and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If count = zero check bytes before zero counter and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (strncmp,5,0) + neg r7,r3 + clrlwi r7,r7,20 + neg r8,r4 + clrlwi r8,r8,20 + srwi. r7,r7,3 + beq L(prebyte_count_loop) + srwi. r8,r8,3 + beq L(prebyte_count_loop) + cmplw r7,r8 + mtctr r7 + ble L(preword2_count_loop) + mtctr r8 + +L(preword2_count_loop): + srwi. r6,r5,3 + beq L(prebyte_count_loop) + mfctr r7 + cmplw r6,r7 + bgt L(set_count_loop) + mtctr r6 + clrlwi r5,r5,29 + +L(word2_count_loop): + lwz r10,0(r3) + lwz r6,4(r3) + addi r3,r3,0x08 + lwz r8,0(r4) + lwz r9,4(r4) + addi r4,r4,0x08 + dlmzb. r12,r10,r6 + bne L(end_check) + cmplw r10,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + bdnz L(word2_count_loop) + +L(prebyte_count_loop): + addi r5,r5,1 + mtctr r5 + bdz L(end_strncmp) + +L(byte_count_loop): + lbz r6,0(r3) + addi r3,r3,1 + lbz r7,0(r4) + addi r4,r4,1 + cmplw r6,r7 + bne L(st1) + cmpwi r6,0 + beq L(end_strncmp) + bdnz L(byte_count_loop) + b L(end_strncmp) + +L(set_count_loop): + slwi r7,r7,3 + subf r5,r7,r5 + b L(word2_count_loop) + +L(end_check): + subfic r12,r12,4 + blt L(end_check2) + rlwinm r12,r12,3,0,31 + srw r10,r10,r12 + srw r8,r8,r12 + cmplw r10,r8 + bne L(st1) + b L(end_strncmp) + +L(end_check2): + addi r12,r12,4 + cmplw r10,r8 + rlwinm r12,r12,3,0,31 + bne L(st1) + srw r6,r6,r12 + srw r9,r9,r12 + cmplw r6,r9 + bne L(st1) + +L(end_strncmp): + addi r3,r0,0 + blr + +L(st1): + mfcr r3 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies new file mode 100644 index 0000000000..70c0d2eda3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/405/fpu +powerpc/powerpc32/405 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies new file mode 100644 index 0000000000..c3e52c5504 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/440/fpu +powerpc/powerpc32/440 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies new file mode 100644 index 0000000000..2829f9ccaf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/464/fpu +powerpc/powerpc32/464 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S new file mode 100644 index 0000000000..17c91238e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S @@ -0,0 +1,152 @@ +/* Optimized memset for PowerPC476 (128-byte cacheline). + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset + + r3:destination address and return address + r4:source integer to copy + r5:byte count + r11:sources integer to copy in all 32 bits of reg + r12:temp return address + + Save return address in r12 + If destinationn is unaligned and count is greater tha 255 bytes + set 0-3 bytes to make destination aligned + If count is greater tha 255 bytes and setting zero to memory + use dbcz to set memeory when we can + otherwsie do the follwoing + If 16 or more words to set we use 16 word copy loop. + Finaly we set 0-15 extra bytes with string store. */ + +EALIGN (memset, 5, 0) + rlwinm r11,r4,0,24,31 + rlwimi r11,r4,8,16,23 + rlwimi r11,r11,16,0,15 + addi r12,r3,0 + cmpwi r5,0x00FF + ble L(preword8_count_loop) + cmpwi r4,0x00 + beq L(use_dcbz) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(preword8_count_loop) + addi r8,0,1 + mtctr r6 + subi r3,r3,1 + +L(unaligned_bytecopy_loop): + stbu r11,0x1(r3) + subf. r5,r8,r5 + beq L(end_memset) + bdnz L(unaligned_bytecopy_loop) + addi r3,r3,1 + +L(preword8_count_loop): + srwi. r6,r5,4 + beq L(preword2_count_loop) + mtctr r6 + addi r3,r3,-4 + mr r8,r11 + mr r9,r11 + mr r10,r11 + +L(word8_count_loop_no_dcbt): + stwu r8,4(r3) + stwu r9,4(r3) + subi r5,r5,0x10 + stwu r10,4(r3) + stwu r11,4(r3) + bdnz L(word8_count_loop_no_dcbt) + addi r3,r3,4 + +L(preword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + +L(end_memset): + addi r3,r12,0 + blr + +L(use_dcbz): + neg r6,r3 + clrlwi. r7,r6,28 + beq L(skip_string_loop) + mr r8,r11 + mr r9,r11 + mr r10,r11 + subf r5,r7,r5 + mtxer r7 + stswx r8,0,r3 + add r3,r3,r7 + +L(skip_string_loop): + clrlwi r8,r6,25 + srwi. r8,r8,4 + beq L(dcbz_pre_loop) + mtctr r8 + +L(word_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(word_loop) + +L(dcbz_pre_loop): + srwi r6,r5,7 + mtctr r6 + addi r7,0,0 + +L(dcbz_loop): + dcbz r3,r7 + addi r3,r3,0x80 + subi r5,r5,0x80 + bdnz L(dcbz_loop) + srwi. r6,r5,4 + beq L(postword2_count_loop) + mtctr r6 + +L(postword8_count_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(postword8_count_loop) + +L(postword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + b L(end_memset) +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies new file mode 100644 index 0000000000..17139bf21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power4/fpu +powerpc/powerpc32/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/Implies new file mode 100644 index 0000000000..39a34c5f57 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Implies @@ -0,0 +1 @@ +wordsize-32 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile new file mode 100644 index 0000000000..cf620c8269 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile @@ -0,0 +1,49 @@ +# Powerpc32 specific build options. + +# Some Powerpc32 variants assume soft-fp is the default even though there is +# an fp variant so provide -mhard-float if --with-fp is explicitly passed. + +ifeq ($(with-fp),yes) ++cflags += -mhard-float +ASFLAGS += -mhard-float +sysdep-LDFLAGS += -mhard-float +endif + +ifeq ($(subdir),gmon) +sysdep_routines += ppc-mcount compat-ppc-mcount +static-only-routines += ppc-mcount +shared-only-routines += compat-ppc-mcount +endif + +ifeq ($(subdir),misc) +sysdep_routines += gprsave0 gprrest0 gprsave1 gprrest1 +endif + +# On PPC, -fpic works until the GOT contains 32768 bytes, and possibly +# more depending on how clever the linker is. Each GOT entry takes 4 bytes, +# so that's at least 8192 entries. Since libc only uses about 2000 entries, +# we want to use -fpic, because this generates fewer relocs. +ifeq (yes,$(build-shared)) +pic-ccflag = -fpic +endif + +ifeq ($(subdir),csu) +# There is no benefit to using sdata for these objects, and the user +# of the library should be able to control what goes into sdata. +CFLAGS-init.o = -G0 +CFLAGS-gmon-start.o = -G0 + +ifeq (yes,$(build-shared)) +# Compatibility +ifeq (yes,$(have-protected)) +CPPFLAGS-libgcc-compat.S = -DHAVE_DOT_HIDDEN +endif +sysdep_routines += libgcc-compat +shared-only-routines += libgcc-compat +endif +endif + +ifeq ($(subdir),elf) +# extra shared linker files to link only into dl-allobjs.so +sysdep-rtld-routines += dl-start +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Versions b/REORG.TODO/sysdeps/powerpc/powerpc32/Versions new file mode 100644 index 0000000000..b0782fecd4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Versions @@ -0,0 +1,40 @@ +libc { + GLIBC_2.0 { + # Functions from libgcc. + __divdi3; __moddi3; __udivdi3; __umoddi3; + __cmpdi2; __ucmpdi2; + __ashldi3; __ashrdi3; __lshrdi3; + __fixdfdi; __fixunsdfdi; + __fixsfdi; __fixunssfdi; + __floatdidf; __floatdisf; + } + GLIBC_2.16 { + __mcount_internal; + } + GLIBC_PRIVATE { + __mcount_internal; + } +} + +libm { + GLIBC_2.2 { + # Special functions to save and restore registers used by the + # runtime libraries. + _restgpr0_13; _restgpr0_14; _restgpr0_15; _restgpr0_16; _restgpr0_17; + _restgpr0_18; _restgpr0_19; _restgpr0_20; _restgpr0_21; _restgpr0_22; + _restgpr0_22; _restgpr0_23; _restgpr0_24; _restgpr0_25; _restgpr0_26; + _restgpr0_27; _restgpr0_28; _restgpr0_29; _restgpr0_30; _restgpr0_31; + _savegpr0_13; _savegpr0_14; _savegpr0_15; _savegpr0_16; _savegpr0_17; + _savegpr0_18; _savegpr0_19; _savegpr0_20; _savegpr0_21; _savegpr0_22; + _savegpr0_22; _savegpr0_23; _savegpr0_24; _savegpr0_25; _savegpr0_26; + _savegpr0_27; _savegpr0_28; _savegpr0_29; _savegpr0_30; _savegpr0_31; + _restgpr1_13; _restgpr1_14; _restgpr1_15; _restgpr1_16; _restgpr1_17; + _restgpr1_18; _restgpr1_19; _restgpr1_20; _restgpr1_21; _restgpr1_22; + _restgpr1_22; _restgpr1_23; _restgpr1_24; _restgpr1_25; _restgpr1_26; + _restgpr1_27; _restgpr1_28; _restgpr1_29; _restgpr1_30; _restgpr1_31; + _savegpr1_13; _savegpr1_14; _savegpr1_15; _savegpr1_16; _savegpr1_17; + _savegpr1_18; _savegpr1_19; _savegpr1_20; _savegpr1_21; _savegpr1_22; + _savegpr1_22; _savegpr1_23; _savegpr1_24; _savegpr1_25; _savegpr1_26; + _savegpr1_27; _savegpr1_28; _savegpr1_29; _savegpr1_30; _savegpr1_31; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S new file mode 100644 index 0000000000..4b60a2f9a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S @@ -0,0 +1,82 @@ +/* longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT) +# define LOAD_GP(N) evldd r##N,((JB_FPRS+((N)-14)*2)*4)(r3) +#else +# define LOAD_GP(N) lwz r##N,((JB_GPRS+(N)-14)*4)(r3) +#endif + +ENTRY (__longjmp_symbol) + +#if defined PTR_DEMANGLE || defined CHECK_SP + lwz r24,(JB_GPR1*4)(r3) +# ifdef CHECK_SP +# ifdef PTR_DEMANGLE + PTR_DEMANGLE3 (r24, r24, r25) +# endif + CHECK_SP (r24) + mr r1,r24 +# endif +#else + lwz r1,(JB_GPR1*4)(r3) +#endif + lwz r0,(JB_LR*4)(r3) + LOAD_GP (14) + LOAD_GP (15) + LOAD_GP (16) + LOAD_GP (17) + LOAD_GP (18) + LOAD_GP (19) + LOAD_GP (20) +#ifdef PTR_DEMANGLE +# ifndef CHECK_SP + PTR_DEMANGLE3 (r1, r24, r25) +# endif + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (4@3), + second argument (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (longjmp, 3, 4@3, -4@4, 4@0) + mtlr r0 + LOAD_GP (21) + LOAD_GP (22) + lwz r5,(JB_CR*4)(r3) + LOAD_GP (23) + LOAD_GP (24) + LOAD_GP (25) + mtcrf 0xFF,r5 + LOAD_GP (26) + LOAD_GP (27) + LOAD_GP (28) + LOAD_GP (29) + LOAD_GP (30) + LOAD_GP (31) + LIBC_PROBE (longjmp_target, 3, 4@3, -4@4, 4@0) + mr r3,r4 + blr +END (__longjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S new file mode 100644 index 0000000000..42127c6ff2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S @@ -0,0 +1,39 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __longjmp_symbol __longjmp +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp); +# define __longjmp_symbol __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef JB_SIZE +# undef __longjmp_symbol +# define __longjmp_symbol __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S new file mode 100644 index 0000000000..c795ff48fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S @@ -0,0 +1,527 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Brutman <brutman@us.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ + + .machine a2 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmplwi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 /* Copy dest reg to r6; */ + blt+ cr1,L(shortcopy) + + + /* Big copy (16 bytes or more) + + Figure out how far to the nearest quadword boundary, or if we are + on one already. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ + clrlwi r8,r8,32-4 /* align to 16byte boundary */ + sub r7,r4,r3 /* compute offset to src from dest */ + cmplwi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */ + beq+ L(dst_aligned) + + + + /* Destination is not aligned on quadword boundary. Get us to one. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + r7 - offset to src from dest + r8 - number of bytes to quadword boundary + */ + + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 /* adjust remaining len */ + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte addr */ + stb r0,0(r6) + addi r6,r6,1 +1: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte addr */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte addr */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+0,8f + lfdx r0,r7,r6 /* copy 8 byte addr */ + stfd r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 /* update src addr */ + + + + /* Dest is quadword aligned now. + + Lots of decisions to make. If we are copying less than a cache + line we won't be here long. If we are not on a cache line + boundary we need to get there. And then we need to figure out + how many cache lines ahead to pre-touch. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + + .align 4 +L(dst_aligned): + + +#ifdef SHARED + mflr r0 +/* Establishes GOT addressability so we can load __cache_line_size + from static. This value was set from the aux vector during startup. */ + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,__cache_line_size-got_label@ha + lwz r9,__cache_line_size-got_label@l(r9) + mtlr r0 +#else +/* Load __cache_line_size from static. This value was set from the + aux vector during startup. */ + lis r9,__cache_line_size@ha + lwz r9,__cache_line_size@l(r9) +#endif + + cmplwi cr5, r9, 0 + bne+ cr5,L(cachelineset) + +/* __cache_line_size not set: generic byte copy without much optimization */ + andi. r0,r5,1 /* If length is odd copy one byte. */ + beq L(cachelinenotset_align) + lbz r7,0(r4) /* Read one byte from source. */ + addi r5,r5,-1 /* Update length. */ + addi r4,r4,1 /* Update source pointer address. */ + stb r7,0(r6) /* Store one byte on dest. */ + addi r6,r6,1 /* Update dest pointer address. */ +L(cachelinenotset_align): + cmpwi cr7,r5,0 /* If length is 0 return. */ + beqlr cr7 + ori r2,r2,0 /* Force a new dispatch group. */ +L(cachelinenotset_loop): + addic. r5,r5,-2 /* Update length. */ + lbz r7,0(r4) /* Load 2 bytes from source. */ + lbz r8,1(r4) + addi r4,r4,2 /* Update source pointer address. */ + stb r7,0(r6) /* Store 2 bytes on dest. */ + stb r8,1(r6) + addi r6,r6,2 /* Update dest pointer address. */ + bne L(cachelinenotset_loop) + blr + + +L(cachelineset): + + addi r10,r9,-1 + + cmpw cr5,r5,r10 /* Less than a cacheline to go? */ + + neg r7,r6 /* How far to next cacheline bdy? */ + + addi r6,r6,-8 /* prepare for stdu */ + cmpwi cr0,r9,128 + addi r4,r4,-8 /* prepare for ldu */ + + + ble+ cr5,L(lessthancacheline) + + beq- cr0,L(big_lines) /* 128 byte line code */ + + + + + /* More than a cacheline left to go, and using 64 byte cachelines */ + + clrlwi r7,r7,32-6 /* How far to next cacheline bdy? */ + + cmplwi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srwi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srwi r10,r5,6 + + cmplwi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmplwi r10,PREFETCH_AHEAD + li r12,64+8 /* prefetch distance */ + ble L(lessthanmaxprefetch) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC): + dcbt r12,r4 + addi r12,r12,64 + bdnz L(prefetchSRC) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch): + mtctr r7 + + cmplwi cr1,r5,64 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrlwi r5,r5,32-6 + + beq cr6,L(cachelinealigned) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline): + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz L(aligntocacheline) + + + .align 4 +L(cachelinealigned): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <64 */ + +L(outerloop): + cmpwi r11,0 + mtctr r11 + beq- L(endloop) + + li r11,64*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfdu fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfdu fp12, 0x40(r6) + + bdnz L(loop) + + +L(endloop): + cmpwi r10,0 + beq- L(endloop2) + mtctr r10 + +L(loop2): /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfdu fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfdu fp12, 0x40(r6) + + bdnz L(loop2) +L(endloop2): + + + .align 4 +L(lessthancacheline): /* Was there less than cache to do ? */ + cmplwi cr0,r5,16 + srwi r7,r5,4 /* divide size by 16 */ + blt- L(do_lt16) + mtctr r7 + +L(copy_remaining): + lfd fp9, 0x08(r4) + lfdu fp10, 0x10(r4) + stfd fp9, 0x08(r6) + stfdu fp10, 0x10(r6) + bdnz L(copy_remaining) + +L(do_lt16): /* less than 16 ? */ + cmplwi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +L(shortcopy): /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: + blr + + + + + + /* Similar to above, but for use with 128 byte lines. */ + + +L(big_lines): + + clrlwi r7,r7,32-7 /* How far to next cacheline bdy? */ + + cmplwi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srwi r7,r7,4 /* How many qw to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srwi r10,r5,7 + + cmplwi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch_128) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmplwi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble L(lessthanmaxprefetch_128) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch_128): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC_128): + dcbt r12,r4 + addi r12,r12,128 + bdnz L(prefetchSRC_128) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch_128): + mtctr r7 + + cmplwi cr1,r5,128 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrlwi r5,r5,32-7 + + beq cr6,L(cachelinealigned_128) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline_128): + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz L(aligntocacheline_128) + + +L(cachelinealigned_128): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <128 */ + +L(outerloop_128): + cmpwi r11,0 + mtctr r11 + beq- L(endloop_128) + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop_128): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + + bdnz L(loop_128) + + +L(endloop_128): + cmpwi r10,0 + beq- L(endloop2_128) + mtctr r10 + +L(loop2_128): /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + bdnz L(loop2_128) +L(endloop2_128): + + b L(lessthancacheline) + + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S b/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S new file mode 100644 index 0000000000..0687be6236 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S @@ -0,0 +1,68 @@ +/* Add two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN (__mpn_add_n, 3, 0) + +/* Set up for loop below. */ + mtcrf 0x01,r6 + srwi. r7,r6,1 + li r10,0 + mtctr r7 + bt 31,L(2) + +/* Clear the carry. */ + addic r0,r0,0 +/* Adjust pointers for loop. */ + addi r3,r3,-4 + addi r4,r4,-4 + addi r5,r5,-4 + b L(0) + +L(2): lwz r7,0(r5) + lwz r6,0(r4) + addc r6,r6,r7 + stw r6,0(r3) + beq L(1) + +/* The loop. */ + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). */ +L(0): lwz r9,4(r4) + lwz r8,4(r5) + lwzu r6,8(r4) + lwzu r7,8(r5) + adde r8,r9,r8 + stw r8,4(r3) + adde r6,r6,r7 + stwu r6,8(r3) + bdnz L(0) +/* Return the carry. */ +L(1): addze r3,r10 + blr +END (__mpn_add_n) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S new file mode 100644 index 0000000000..f742be21b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S @@ -0,0 +1,48 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res+s1*s2 and put result back in res; return carry. */ +ENTRY (__mpn_addmul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + addc r8,r7,r9 + addi r3,r3,-4 /* adjust res_ptr */ + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + addc r8,r7,r9 + bdnz L(0) + +L(1): stw r8,4(r3) + addze r3,r10 + blr +END (__mpn_addmul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h new file mode 100644 index 0000000000..96c7d81359 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h @@ -0,0 +1,126 @@ +/* Atomic operations. PowerPC32 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction. + This is a hint to the hardware to expect additional updates adjacent + to the lock word or not. If we are acquiring a Mutex, the hint + should be true. Otherwise we releasing a Mutex or doing a simple + atomic operation. In that case we don't expect additional updates + adjacent to the lock word after the Store Conditional and the hint + should be false. */ + +#if defined _ARCH_PWR6 || defined _ARCH_PWR6X +# define MUTEX_HINT_ACQ ",1" +# define MUTEX_HINT_REL ",0" +#else +# define MUTEX_HINT_ACQ +# define MUTEX_HINT_REL +#endif + +#define __HAVE_64B_ATOMICS 0 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 1 + +/* + * The 32-bit exchange_bool is different on powerpc64 because the subf + * does signed 64-bit arithmetic while the lwarx is 32-bit unsigned + * (a load word and zero (high 32) form). So powerpc64 has a slightly + * different version in sysdeps/powerpc/powerpc64/atomic-machine.h. + */ +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ +({ \ + unsigned int __tmp; \ + __asm __volatile ( \ + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%2,%0\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (mem), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +/* Powerpc32 processors don't implement the 64-bit (doubleword) forms of + load and reserve (ldarx) and store conditional (stdcx.) instructions. + So for powerpc32 we stub out the 64-bit forms. */ +#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ + (abort (), 0) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_atomic_exchange_64_acq(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_64_rel(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64_acq(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64_rel(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_increment_val_64(mem) \ + ({ abort (); (*mem)++; }) + +#define __arch_atomic_decrement_val_64(mem) \ + ({ abort (); (*mem)--; }) + +#define __arch_atomic_decrement_if_positive_64(mem) \ + ({ abort (); (*mem)--; }) + +#ifdef _ARCH_PWR4 +/* + * Newer powerpc64 processors support the new "light weight" sync (lwsync) + * So if the build is using -mcpu=[power4,power5,power5+,970] we can + * safely use lwsync. + */ +# define atomic_read_barrier() __asm ("lwsync" ::: "memory") +/* + * "light weight" sync can also be used for the release barrier. + */ +# ifndef UP +# define __ARCH_REL_INSTR "lwsync" +# endif +# define atomic_write_barrier() __asm ("lwsync" ::: "memory") +#else +/* + * Older powerpc32 processors don't support the new "light weight" + * sync (lwsync). So the only safe option is to use normal sync + * for all powerpc32 applications. + */ +# define atomic_read_barrier() __asm ("sync" ::: "memory") +# define atomic_write_barrier() __asm ("sync" ::: "memory") +#endif + +/* + * Include the rest of the atomic ops macros which are common to both + * powerpc32 and powerpc64. + */ +#include_next <atomic-machine.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c b/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c new file mode 100644 index 0000000000..394062136c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c @@ -0,0 +1,131 @@ +/* Return backtrace of current program state. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <execinfo.h> +#include <stddef.h> +#include <string.h> +#include <signal.h> +#include <libc-vdso.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *next; + void *return_address; +}; + +#define SIGNAL_FRAMESIZE 64 + +/* Since the signal handler is just like any other function it needs to + save/restore its LR and it will save it into callers stack frame. + Since a signal handler doesn't have a caller, the kernel creates a + dummy frame to make it look like it has a caller. */ +struct signal_frame_32 { + char dummy[SIGNAL_FRAMESIZE]; + struct sigcontext sctx; + mcontext_t mctx; + /* We don't care about the rest, since IP value is at 'mctx' field. */ +}; + +static inline int +is_sigtramp_address (void *nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp32)) + return 1; +#endif + return 0; +} + +struct rt_signal_frame_32 { + char dummy[SIGNAL_FRAMESIZE + 16]; + siginfo_t info; + struct ucontext uc; + /* We don't care about the rest, since IP value is at 'uc' field. */ +}; + +static inline int +is_sigtramp_address_rt (void * nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp_rt32)) + return 1; +#endif + return 0; +} + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("lwz %0,0(1)" : "=r"(current)); + + for ( count = 0; + current != NULL && count < size; + current = current->next, count++) + { + gregset_t *gregset = NULL; + + array[count] = current->return_address; + + /* Check if the symbol is the signal trampoline and get the interrupted + * symbol address from the trampoline saved area. */ + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_32 *sigframe = + (struct signal_frame_32*) current; + gregset = &sigframe->mctx.gregs; + } + else if (is_sigtramp_address_rt (current->return_address)) + { + struct rt_signal_frame_32 *sigframe = + (struct rt_signal_frame_32*) current; + gregset = &sigframe->uc.uc_mcontext.uc_regs->gregs; + } + if (gregset) + { + array[++count] = (void*)((*gregset)[PT_NIP]); + current = (void*)((*gregset)[PT_R1]); + } + } + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h b/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h new file mode 100644 index 0000000000..04ca9debf0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __powerpc64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S new file mode 100644 index 0000000000..169766c304 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S @@ -0,0 +1,56 @@ +/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. PowerPC32/64 version. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <shlib-compat.h> +#include <libc-symbols.h> +#include <sysdep.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +ENTRY (_setjmp) + li r4,0 /* Set second argument to 0. */ + b __sigsetjmp@local +END (_setjmp) +libc_hidden_def (_setjmp) +#else +/* Build a versioned object for libc. */ + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0); + +ENTRY (__novmx_setjmp) + li r4,0 /* Set second argument to 0. */ + b __novmx__sigsetjmp@local +END (__novmx_setjmp) +libc_hidden_def (__novmx_setjmp) +# endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */ + +versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) +/* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined + as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c + if HAVE_CLEANUP_JMP_BUF is defined */ +ENTRY (__GI__setjmp) + li r4,0 /* Set second argument to 0. */ + b __vmx__sigsetjmp@local +END (__GI__setjmp) + +ENTRY (__vmx_setjmp) + li r4,0 /* Set second argument to 0. */ + b __vmx__sigsetjmp@local +END (__vmx_setjmp) +libc_hidden_def (__vmx_setjmp) +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S new file mode 100644 index 0000000000..212d6ce8b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S @@ -0,0 +1,39 @@ +/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. PowerPC32/64 version. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <shlib-compat.h> +#include <libc-symbols.h> +#include <sysdep.h> + +#if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) + +ENTRY (__novmxsetjmp) + li r4,1 /* Set second argument to 1. */ + b __novmx__sigsetjmp@local +END (__novmxsetjmp) +strong_alias (__novmxsetjmp, __novmx__setjmp) +compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0) + +#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */ + +ENTRY (__vmxsetjmp) + li r4,1 /* Set second argument to 1. */ + b __vmx__sigsetjmp@local +END (__vmxsetjmp) +strong_alias (__vmxsetjmp, __vmx__setjmp) +strong_alias (__vmx__setjmp, __setjmp) +versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S new file mode 100644 index 0000000000..2638b12db0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S @@ -0,0 +1,27 @@ +/* Optimized bzero `implementation' for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero) + + mr r5,r4 + li r4,0 + b memset@local +END (__bzero) +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S new file mode 100644 index 0000000000..a7f761408a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S @@ -0,0 +1,242 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ + +/* memcpy routine optimized for CELL-BE-PPC v2.0 + * + * The CELL PPC core has 1 integer unit and 1 load/store unit + * CELL: + * 1st level data cache = 32K + * 2nd level data cache = 512K + * 3rd level data cache = 0K + * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks, + * latency to memory is >400 clocks + * To improve copy performance we need to prefetch source data + * far ahead to hide this latency + * For best performance instruction forms ending in "." like "andi." + * should be avoided as the are implemented in microcode on CELL. + * The below code is loop unrolled for the CELL cache line of 128 bytes + */ + +.align 7 + +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmplwi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 + blt+ cr1,.Lshortcopy + +.Lbigcopy: + neg r8,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + clrlwi r8,r8,32-4 /* align to 16byte boundary */ + sub r7,r4,r3 + cmplwi cr0,r8,0 + beq+ .Ldst_aligned + +.Ldst_unaligned: + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) + addi r6,r6,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: bf cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 + +.Ldst_aligned: + + cmpwi cr5,r5,128-1 + + neg r7,r6 + addi r6,r6,-8 /* prepare for stfdu */ + addi r4,r4,-8 /* prepare for lfdu */ + + clrlwi r7,r7,32-7 /* align to cacheline boundary */ + ble+ cr5,.Llessthancacheline + + cmplwi cr6,r7,0 + subf r5,r7,r5 + srwi r7,r7,4 /* divide size by 16 */ + srwi r10,r5,7 /* number of cache lines to copy */ + + cmplwi r10,0 + li r11,0 /* number cachelines to copy with prefetch */ + beq .Lnocacheprefetch + + cmplwi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble .Llessthanmaxprefetch + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +.Llessthanmaxprefetch: + mtctr r10 + +.LprefetchSRC: + dcbt r12,r4 + addi r12,r12,128 + bdnz .LprefetchSRC + +.Lnocacheprefetch: + mtctr r7 + cmplwi cr1,r5,128 + clrlwi r5,r5,32-7 + beq cr6,.Lcachelinealigned + +.Laligntocacheline: + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz .Laligntocacheline + + +.Lcachelinealigned: /* copy while cache lines */ + + blt- cr1,.Llessthancacheline /* size <128 */ + +.Louterloop: + cmpwi r11,0 + mtctr r11 + beq- .Lendloop + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + +.align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +.Lloop: /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) /* 4 register stride copy is optimal */ + lfd fp11, 0x18(r4) /* to hide 1st level cache latency. */ + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + + bdnz .Lloop + +.Lendloop: + cmpwi r10,0 + slwi r10,r10,2 /* adjust from 128 to 32 byte stride */ + beq- .Lendloop2 + mtctr r10 + +.Lloop2: /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfdu fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfdu fp12, 0x20(r6) + + bdnz .Lloop2 +.Lendloop2: + +.Llessthancacheline: /* less than cache to do ? */ + cmplwi cr0,r5,16 + srwi r7,r5,4 /* divide size by 16 */ + blt- .Ldo_lt16 + mtctr r7 + +.Lcopy_remaining: + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz .Lcopy_remaining + +.Ldo_lt16: /* less than 16 ? */ + cmplwi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +.Lshortcopy: /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: blr + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S new file mode 100644 index 0000000000..2a9cb24072 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S @@ -0,0 +1,11 @@ +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_15) + + compat_text_section +# define _mcount __compat_mcount +# include "ppc-mcount.S" +# undef _mcount + +compat_symbol (libc, __compat_mcount, _mcount, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/configure b/REORG.TODO/sysdeps/powerpc/powerpc32/configure new file mode 100644 index 0000000000..29cfd53e8b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/configure @@ -0,0 +1,29 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc32. + +# See whether GCC uses -msecure-plt. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -msecure-plt by default" >&5 +$as_echo_n "checking for -msecure-plt by default... " >&6; } +if ${libc_cv_ppc_secure_plt+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'int foo (void) { extern int bar; return bar; }' > conftest.c +libc_cv_ppc_secure_plt=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -fpic -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '_GLOBAL_OFFSET_TABLE_-.*@ha' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_secure_plt=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_secure_plt" >&5 +$as_echo "$libc_cv_ppc_secure_plt" >&6; } +if test $libc_cv_ppc_secure_plt = yes; then + $as_echo "#define HAVE_PPC_SECURE_PLT 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac new file mode 100644 index 0000000000..5d3a9b509d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac @@ -0,0 +1,16 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc32. + +# See whether GCC uses -msecure-plt. +AC_CACHE_CHECK(for -msecure-plt by default, libc_cv_ppc_secure_plt, [dnl +echo 'int foo (void) { extern int bar; return bar; }' > conftest.c +libc_cv_ppc_secure_plt=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -fpic -o conftest.s 1>&AS_MESSAGE_LOG_FD); then + if grep '_GLOBAL_OFFSET_TABLE_-.*@ha' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_secure_plt=yes + fi +fi +rm -rf conftest*]) +if test $libc_cv_ppc_secure_plt = yes; then + AC_DEFINE(HAVE_PPC_SECURE_PLT) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S b/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S new file mode 100644 index 0000000000..50b02630c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S @@ -0,0 +1,89 @@ +/* Special .init and .fini section support for PowerPC. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + + .section .init,"ax",@progbits + .align 2 + .globl _init + .type _init, @function +_init: + stwu r1, -16(r1) + mflr r0 + stw r0, 20(r1) + stw r30, 8(r1) + SETUP_GOT_ACCESS (r30, .Lgot_label_i) + addis r30, r30, _GLOBAL_OFFSET_TABLE_-.Lgot_label_i@ha + addi r30, r30, _GLOBAL_OFFSET_TABLE_-.Lgot_label_i@l +#if PREINIT_FUNCTION_WEAK + lwz r0, PREINIT_FUNCTION@got(r30) + cmpwi cr7, r0, 0 + beq+ cr7, 1f + bl PREINIT_FUNCTION@plt +1: +#else + bl PREINIT_FUNCTION@local +#endif + + .section .fini,"ax",@progbits + .align 2 + .globl _fini + .type _fini, @function +_fini: + stwu r1, -16(r1) + mflr r0 + stw r0, 20(r1) + stw r30, 8(r1) + SETUP_GOT_ACCESS (r30, .Lgot_label_f) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S b/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S new file mode 100644 index 0000000000..67be2950fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S @@ -0,0 +1,53 @@ +/* Special .init and .fini section support for PowerPC. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + lwz r0, 20(r1) + mtlr r0 + lwz r30, 8(r1) + addi r1, r1, 16 + blr + + .section .fini,"ax",@progbits + lwz r0, 20(r1) + mtlr r0 + lwz r30, 8(r1) + addi r1, r1, 16 + blr diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h new file mode 100644 index 0000000000..7fe2be7939 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h @@ -0,0 +1,3 @@ +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC_NUM diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h new file mode 100644 index 0000000000..e5d6540ce4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h @@ -0,0 +1,52 @@ +/* Machine-dependent ELF indirect relocation inline functions. + PowerPC version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf32_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf32_Addr addr) +{ + return ((Elf32_Addr (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf32_Rela *reloc) +{ + unsigned int r_type = ELF32_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_PPC_IRELATIVE)) + { + Elf32_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf32_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c new file mode 100644 index 0000000000..2d6a576552 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c @@ -0,0 +1,608 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <string.h> +#include <sys/param.h> +#include <link.h> +#include <ldsodefs.h> +#include <elf/dynamic-link.h> +#include <dl-machine.h> +#include <_itoa.h> + +/* The value __cache_line_size is defined in dl-sysdep.c and is initialised + by _dl_sysdep_start via DL_PLATFORM_INIT. */ +extern int __cache_line_size attribute_hidden; + + +/* Stuff for the PLT. */ +#define PLT_INITIAL_ENTRY_WORDS 18 +#define PLT_LONGBRANCH_ENTRY_WORDS 0 +#define PLT_TRAMPOLINE_ENTRY_WORDS 6 +#define PLT_DOUBLE_SIZE (1<<13) +#define PLT_ENTRY_START_WORDS(entry_number) \ + (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \ + + ((entry_number) > PLT_DOUBLE_SIZE \ + ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \ + : 0)) +#define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries) + +/* Macros to build PowerPC opcode words. */ +#define OPCODE_ADDI(rd,ra,simm) \ + (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff)) +#define OPCODE_ADDIS(rd,ra,simm) \ + (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff)) +#define OPCODE_ADD(rd,ra,rb) \ + (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11) +#define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc)) +#define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc)) +#define OPCODE_BCTR() 0x4e800420 +#define OPCODE_LWZ(rd,d,ra) \ + (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff)) +#define OPCODE_LWZU(rd,d,ra) \ + (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff)) +#define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21) +#define OPCODE_RLWINM(ra,rs,sh,mb,me) \ + (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1) + +#define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm) +#define OPCODE_ADDIS_HI(rd,ra,value) \ + OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16) +#define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value) +#define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh) + + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") + +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + + +/* The idea here is that to conform to the ABI, we are supposed to try + to load dynamic objects between 0x10000 (we actually use 0x40000 as + the lower bound, to increase the chance of a memory reference from + a null pointer giving a segfault) and the program's load address; + this may allow us to use a branch instruction in the PLT rather + than a computed jump. The address is only used as a preference for + mmap, so if we get it wrong the worst that happens is that it gets + mapped somewhere else. */ + +ElfW(Addr) +__elf_preferred_address (struct link_map *loader, size_t maplength, + ElfW(Addr) mapstartpref) +{ + ElfW(Addr) low, high; + struct link_map *l; + Lmid_t nsid; + + /* If the object has a preference, load it there! */ + if (mapstartpref != 0) + return mapstartpref; + + /* Otherwise, quickly look for a suitable gap between 0x3FFFF and + 0x70000000. 0x3FFFF is so that references off NULL pointers will + cause a segfault, 0x70000000 is just paranoia (it should always + be superseded by the program's load address). */ + low = 0x0003FFFF; + high = 0x70000000; + for (nsid = 0; nsid < DL_NNS; ++nsid) + for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next) + { + ElfW(Addr) mapstart, mapend; + mapstart = l->l_map_start & ~(GLRO(dl_pagesize) - 1); + mapend = l->l_map_end | (GLRO(dl_pagesize) - 1); + assert (mapend > mapstart); + + /* Prefer gaps below the main executable, note that l == + _dl_loaded does not work for static binaries loading + e.g. libnss_*.so. */ + if ((mapend >= high || l->l_type == lt_executable) + && high >= mapstart) + high = mapstart; + else if (mapend >= low && low >= mapstart) + low = mapend; + else if (high >= mapend && mapstart >= low) + { + if (high - mapend >= mapstart - low) + low = mapend; + else + high = mapstart; + } + } + + high -= 0x10000; /* Allow some room between objects. */ + maplength = (maplength | (GLRO(dl_pagesize) - 1)) + 1; + if (high <= low || high - low < maplength ) + return 0; + return high - maplength; /* Both high and maplength are page-aligned. */ +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. + Also install a small trampoline to be used by entries that have + been relocated to an address too far away for a single branch. */ + +/* There are many kinds of PLT entries: + + (1) A direct jump to the actual routine, either a relative or + absolute branch. These are set up in __elf_machine_fixup_plt. + + (2) Short lazy entries. These cover the first 8192 slots in + the PLT, and look like (where 'index' goes from 0 to 8191): + + li %r11, index*4 + b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1] + + (3) Short indirect jumps. These replace (2) when a direct jump + wouldn't reach. They look the same except that the branch + is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'. + + (4) Long lazy entries. These cover the slots when a short entry + won't fit ('index*4' overflows its field), and look like: + + lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS]) + lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS]) + b &plt[PLT_TRAMPOLINE_ENTRY_WORDS] + bctr + + (5) Long indirect jumps. These replace (4) when a direct jump + wouldn't reach. They look like: + + lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS]) + lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS]) + mtctr %r12 + bctr + + (6) Long direct jumps. These are used when thread-safety is not + required. They look like: + + lis %r12, %hi(finaladdr) + addi %r12, %r12, %lo(finaladdr) + mtctr %r12 + bctr + + + The lazy entries, (2) and (4), are set up here in + __elf_machine_runtime_setup. (1), (3), and (5) are set up in + __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed + in __process_machine_rela. + + The reason for the somewhat strange construction of the long + entries, (4) and (5), is that we need to ensure thread-safety. For + (1) and (3), this is obvious because only one instruction is + changed and the PPC architecture guarantees that aligned stores are + atomic. For (5), this is more tricky. When changing (4) to (5), + the `b' instruction is first changed to `mtctr'; this is safe + and is why the `lwzu' instruction is not just a simple `addi'. + Once this is done, and is visible to all processors, the `lwzu' can + safely be changed to a `lwz'. */ +int +__elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf32_Word i; + Elf32_Word *plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf32_Rela)); + Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries); + Elf32_Word data_words = (Elf32_Word) (plt + rel_offset_words); + Elf32_Word size_modified; + + extern void _dl_runtime_resolve (void); + extern void _dl_prof_resolve (void); + + /* Convert the index in r11 into an actual address, and get the + word at that address. */ + plt[PLT_LONGBRANCH_ENTRY_WORDS] = OPCODE_ADDIS_HI (11, 11, data_words); + plt[PLT_LONGBRANCH_ENTRY_WORDS + 1] = OPCODE_LWZ (11, data_words, 11); + + /* Call the procedure at that address. */ + plt[PLT_LONGBRANCH_ENTRY_WORDS + 2] = OPCODE_MTCTR (11); + plt[PLT_LONGBRANCH_ENTRY_WORDS + 3] = OPCODE_BCTR (); + + if (lazy) + { + Elf32_Word *tramp = plt + PLT_TRAMPOLINE_ENTRY_WORDS; + Elf32_Word dlrr; + Elf32_Word offset; + +#ifndef PROF + dlrr = (Elf32_Word) (profile + ? _dl_prof_resolve + : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; +#else + dlrr = (Elf32_Word) _dl_runtime_resolve; +#endif + + /* For the long entries, subtract off data_words. */ + tramp[0] = OPCODE_ADDIS_HI (11, 11, -data_words); + tramp[1] = OPCODE_ADDI (11, 11, -data_words); + + /* Multiply index of entry by 3 (in r11). */ + tramp[2] = OPCODE_SLWI (12, 11, 1); + tramp[3] = OPCODE_ADD (11, 12, 11); + if (dlrr <= 0x01fffffc || dlrr >= 0xfe000000) + { + /* Load address of link map in r12. */ + tramp[4] = OPCODE_LI (12, (Elf32_Word) map); + tramp[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map); + + /* Call _dl_runtime_resolve. */ + tramp[6] = OPCODE_BA (dlrr); + } + else + { + /* Get address of _dl_runtime_resolve in CTR. */ + tramp[4] = OPCODE_LI (12, dlrr); + tramp[5] = OPCODE_ADDIS_HI (12, 12, dlrr); + tramp[6] = OPCODE_MTCTR (12); + + /* Load address of link map in r12. */ + tramp[7] = OPCODE_LI (12, (Elf32_Word) map); + tramp[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map); + + /* Call _dl_runtime_resolve. */ + tramp[9] = OPCODE_BCTR (); + } + + /* Set up the lazy PLT entries. */ + offset = PLT_INITIAL_ENTRY_WORDS; + i = 0; + while (i < num_plt_entries && i < PLT_DOUBLE_SIZE) + { + plt[offset ] = OPCODE_LI (11, i * 4); + plt[offset+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + 2 + - (offset+1)) + * 4); + i++; + offset += 2; + } + while (i < num_plt_entries) + { + plt[offset ] = OPCODE_LIS_HI (11, i * 4 + data_words); + plt[offset+1] = OPCODE_LWZU (12, i * 4 + data_words, 11); + plt[offset+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + - (offset+2)) + * 4); + plt[offset+3] = OPCODE_BCTR (); + i++; + offset += 4; + } + } + + /* Now, we've modified code. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. + The following gets the __cache_line_size, when available. */ + + /* Default minimum 4 words per cache line. */ + int line_size_words = 4; + + if (lazy && __cache_line_size != 0) + /* Convert bytes to words. */ + line_size_words = __cache_line_size / 4; + + size_modified = lazy ? rel_offset_words : 6; + for (i = 0; i < size_modified; i += line_size_words) + PPC_DCBST (plt + i); + PPC_DCBST (plt + size_modified - 1); + PPC_SYNC; + + for (i = 0; i < size_modified; i += line_size_words) + PPC_ICBI (plt + i); + PPC_ICBI (plt + size_modified - 1); + PPC_ISYNC; + } + + return lazy; +} + +Elf32_Addr +__elf_machine_fixup_plt (struct link_map *map, + Elf32_Addr *reloc_addr, Elf32_Addr finaladdr) +{ + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 == delta) + *reloc_addr = OPCODE_B (delta); + else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) + *reloc_addr = OPCODE_BA (finaladdr); + else + { + Elf32_Word *plt, *data_words; + Elf32_Word index, offset, num_plt_entries; + + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof(Elf32_Rela)); + plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + offset = reloc_addr - plt; + index = (offset - PLT_INITIAL_ENTRY_WORDS)/2; + data_words = plt + PLT_DATA_START_WORDS (num_plt_entries); + + reloc_addr += 1; + + if (index < PLT_DOUBLE_SIZE) + { + data_words[index] = finaladdr; + PPC_SYNC; + *reloc_addr = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS - (offset+1)) + * 4); + } + else + { + index -= (index - PLT_DOUBLE_SIZE)/2; + + data_words[index] = finaladdr; + PPC_SYNC; + + reloc_addr[1] = OPCODE_MTCTR (12); + MODIFIED_CODE_NOQUEUE (reloc_addr + 1); + PPC_SYNC; + + reloc_addr[0] = OPCODE_LWZ (12, + (Elf32_Word) (data_words + index), 11); + } + } + MODIFIED_CODE (reloc_addr); + return finaladdr; +} + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf32_Addr *const reloc_addr, + const Elf32_Sym *refsym) +{ + char buffer[128]; + char *t; + t = stpcpy (buffer, name); + t = stpcpy (t, " relocation at 0x00000000"); + _itoa_word ((unsigned) reloc_addr, t, 16, 0); + if (refsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + refsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} + +void +__process_machine_rela (struct link_map *map, + const Elf32_Rela *reloc, + struct link_map *sym_map, + const Elf32_Sym *sym, + const Elf32_Sym *refsym, + Elf32_Addr *const reloc_addr, + Elf32_Addr const finaladdr, + int rinfo) +{ + union unaligned + { + uint16_t u2; + uint32_t u4; + } __attribute__((__packed__)); + + switch (rinfo) + { + case R_PPC_NONE: + return; + + case R_PPC_ADDR32: + case R_PPC_GLOB_DAT: + case R_PPC_RELATIVE: + *reloc_addr = finaladdr; + return; + + case R_PPC_IRELATIVE: + *reloc_addr = ((Elf32_Addr (*) (void)) finaladdr) (); + return; + + case R_PPC_UADDR32: + ((union unaligned *) reloc_addr)->u4 = finaladdr; + break; + + case R_PPC_ADDR24: + if (__glibc_unlikely (finaladdr > 0x01fffffc && finaladdr < 0xfe000000)) + _dl_reloc_overflow (map, "R_PPC_ADDR24", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xfc000003) | (finaladdr & 0x3fffffc); + break; + + case R_PPC_ADDR16: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_ADDR16", reloc_addr, refsym); + *(Elf32_Half*) reloc_addr = finaladdr; + break; + + case R_PPC_UADDR16: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_UADDR16", reloc_addr, refsym); + ((union unaligned *) reloc_addr)->u2 = finaladdr; + break; + + case R_PPC_ADDR16_LO: + *(Elf32_Half*) reloc_addr = finaladdr; + break; + + case R_PPC_ADDR16_HI: + *(Elf32_Half*) reloc_addr = finaladdr >> 16; + break; + + case R_PPC_ADDR16_HA: + *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16; + break; + + case R_PPC_ADDR14: + case R_PPC_ADDR14_BRTAKEN: + case R_PPC_ADDR14_BRNTAKEN: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_ADDR14", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xffff0003) | (finaladdr & 0xfffc); + if (rinfo != R_PPC_ADDR14) + *reloc_addr = ((*reloc_addr & 0xffdfffff) + | ((rinfo == R_PPC_ADDR14_BRTAKEN) + ^ (finaladdr >> 31)) << 21); + break; + + case R_PPC_REL24: + { + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 != delta) + _dl_reloc_overflow (map, "R_PPC_REL24", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xfc000003) | (delta & 0x3fffffc); + } + break; + + case R_PPC_COPY: + if (sym == NULL) + /* This can happen in trace mode when an object could not be + found. */ + return; + if (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) && sym->st_size < refsym->st_size)) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("\ +%s: Symbol `%s' has different size in shared object, consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr, (char *) finaladdr, MIN (sym->st_size, + refsym->st_size)); + return; + + case R_PPC_REL32: + *reloc_addr = finaladdr - (Elf32_Word) reloc_addr; + return; + + case R_PPC_JMP_SLOT: + /* It used to be that elf_machine_fixup_plt was used here, + but that doesn't work when ld.so relocates itself + for the second time. On the bright side, there's + no need to worry about thread-safety here. */ + { + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 == delta) + *reloc_addr = OPCODE_B (delta); + else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) + *reloc_addr = OPCODE_BA (finaladdr); + else + { + Elf32_Word *plt, *data_words; + Elf32_Word index, offset, num_plt_entries; + + plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + offset = reloc_addr - plt; + + if (offset < PLT_DOUBLE_SIZE*2 + PLT_INITIAL_ENTRY_WORDS) + { + index = (offset - PLT_INITIAL_ENTRY_WORDS)/2; + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof(Elf32_Rela)); + data_words = plt + PLT_DATA_START_WORDS (num_plt_entries); + data_words[index] = finaladdr; + reloc_addr[0] = OPCODE_LI (11, index * 4); + reloc_addr[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS + - (offset+1)) + * 4); + MODIFIED_CODE_NOQUEUE (reloc_addr + 1); + } + else + { + reloc_addr[0] = OPCODE_LIS_HI (12, finaladdr); + reloc_addr[1] = OPCODE_ADDI (12, 12, finaladdr); + reloc_addr[2] = OPCODE_MTCTR (12); + reloc_addr[3] = OPCODE_BCTR (); + MODIFIED_CODE_NOQUEUE (reloc_addr + 3); + } + } + } + break; + +#define DO_TLS_RELOC(suffix) \ + case R_PPC_DTPREL##suffix: \ + /* During relocation all TLS symbols are defined and used. \ + Therefore the offset is already correct. */ \ + if (sym_map != NULL) \ + do_reloc##suffix ("R_PPC_DTPREL"#suffix, \ + TLS_DTPREL_VALUE (sym, reloc)); \ + break; \ + case R_PPC_TPREL##suffix: \ + if (sym_map != NULL) \ + { \ + CHECK_STATIC_TLS (map, sym_map); \ + do_reloc##suffix ("R_PPC_TPREL"#suffix, \ + TLS_TPREL_VALUE (sym_map, sym, reloc)); \ + } \ + break; + + inline void do_reloc16 (const char *r_name, Elf32_Addr value) + { + if (__glibc_unlikely (value > 0x7fff && value < 0xffff8000)) + _dl_reloc_overflow (map, r_name, reloc_addr, refsym); + *(Elf32_Half *) reloc_addr = value; + } + inline void do_reloc16_LO (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = value; + } + inline void do_reloc16_HI (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = value >> 16; + } + inline void do_reloc16_HA (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = (value + 0x8000) >> 16; + } + DO_TLS_RELOC (16) + DO_TLS_RELOC (16_LO) + DO_TLS_RELOC (16_HI) + DO_TLS_RELOC (16_HA) + + default: + _dl_reloc_bad_type (map, rinfo, 0); + return; + } + + MODIFIED_CODE_NOQUEUE (reloc_addr); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h new file mode 100644 index 0000000000..28eb50f92d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h @@ -0,0 +1,455 @@ +/* Machine-dependent ELF dynamic relocation inline functions. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc" + +#include <assert.h> +#include <dl-tls.h> +#include <dl-irel.h> +#include <hwcapinfo.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC(x) (DT_PPC_##x - DT_LOPROC + DT_NUM) + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf32_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return the value of the GOT pointer. */ +static inline Elf32_Addr * __attribute__ ((const)) +ppc_got (void) +{ + Elf32_Addr *got; + + asm ("bcl 20,31,1f\n" + "1: mflr %0\n" + " addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n" + " addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n" + : "=b" (got) : : "lr"); + + return got; +} + +/* Return the link-time address of _DYNAMIC, stored as + the first value in the GOT. */ +static inline Elf32_Addr __attribute__ ((const)) +elf_machine_dynamic (void) +{ + return *ppc_got (); +} + +/* Return the run-time load address of the shared object. */ +static inline Elf32_Addr __attribute__ ((const)) +elf_machine_load_address (void) +{ + Elf32_Addr *branchaddr; + Elf32_Addr runtime_dynamic; + + /* This is much harder than you'd expect. Possibly I'm missing something. + The 'obvious' way: + + Apparently, "bcl 20,31,$+4" is what should be used to load LR + with the address of the next instruction. + I think this is so that machines that do bl/blr pairing don't + get confused. + + asm ("bcl 20,31,0f ;" + "0: mflr 0 ;" + "lis %0,0b@ha;" + "addi %0,%0,0b@l;" + "subf %0,%0,0" + : "=b" (addr) : : "r0", "lr"); + + doesn't work, because the linker doesn't have to (and in fact doesn't) + update the @ha and @l references; the loader (which runs after this + code) will do that. + + Instead, we use the following trick: + + The linker puts the _link-time_ address of _DYNAMIC at the first + word in the GOT. We could branch to that address, if we wanted, + by using an @local reloc; the linker works this out, so it's safe + to use now. We can't, of course, actually branch there, because + we'd cause an illegal instruction exception; so we need to compute + the address ourselves. That gives us the following code: */ + + /* Get address of the 'b _DYNAMIC@local'... */ + asm ("bcl 20,31,0f;" + "b _DYNAMIC@local;" + "0:" + : "=l" (branchaddr)); + + /* So now work out the difference between where the branch actually points, + and the offset of that location in memory from the start of the file. */ + runtime_dynamic = ((Elf32_Addr) branchaddr + + ((Elf32_Sword) (*branchaddr << 6 & 0xffffff00) >> 6)); + + return runtime_dynamic - elf_machine_dynamic (); +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf32_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + +/* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ +#define ELF_MACHINE_USER_ADDRESS_MASK 0xf0000000UL + +/* The actual _start code is in dl-start.S. Use a really + ugly bit of assembler to let dl-start.o see _dl_start. */ +#define RTLD_START asm (".globl _dl_start"); + +/* Decide where a relocatable object should be loaded. */ +extern ElfW(Addr) +__elf_preferred_address(struct link_map *loader, size_t maplength, + ElfW(Addr) mapstartpref); +#define ELF_PREFERRED_ADDRESS(loader, maplength, mapstartpref) \ + __elf_preferred_address (loader, maplength, mapstartpref) + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one + of the main executable's symbols, as for a COPY reloc. */ +/* We never want to use a PLT entry as the destination of a + reloc, when what is being relocated is a branch. This is + partly for efficiency, but mostly so we avoid loops. */ +#define elf_machine_type_class(type) \ + ((((type) == R_PPC_JMP_SLOT \ + || (type) == R_PPC_REL24 \ + || ((type) >= R_PPC_DTPMOD32 /* contiguous TLS */ \ + && (type) <= R_PPC_DTPREL32) \ + || (type) == R_PPC_ADDR24) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC_COPY) * ELF_RTYPE_CLASS_COPY)) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +#ifdef SHARED +# define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + __tcb_parse_hwcap_and_convert_at_platform (); +} +#endif + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. + Also install a small trampoline to be used by entries that have + been relocated to an address too far away for a single branch. */ +extern int __elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile); + +static inline int +elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile) +{ + if (map->l_info[DT_JMPREL] == 0) + return lazy; + + if (map->l_info[DT_PPC(GOT)] == 0) + /* Handle old style PLT. */ + return __elf_machine_runtime_setup (map, lazy, profile); + + /* New style non-exec PLT consisting of an array of addresses. */ + map->l_info[DT_PPC(GOT)]->d_un.d_ptr += map->l_addr; + if (lazy) + { + Elf32_Addr *plt, *got, glink; + Elf32_Word num_plt_entries; + void (*dlrr) (void); + extern void _dl_runtime_resolve (void); + extern void _dl_prof_resolve (void); + + if (__glibc_likely (!profile)) + dlrr = _dl_runtime_resolve; + else + { + if (GLRO(dl_profile) != NULL + &&_dl_name_match_p (GLRO(dl_profile), map)) + GL(dl_profile_map) = map; + dlrr = _dl_prof_resolve; + } + got = (Elf32_Addr *) map->l_info[DT_PPC(GOT)]->d_un.d_ptr; + glink = got[1]; + got[1] = (Elf32_Addr) dlrr; + got[2] = (Elf32_Addr) map; + + /* Relocate everything in .plt by the load address offset. */ + plt = (Elf32_Addr *) D_PTR (map, l_info[DT_PLTGOT]); + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf32_Rela)); + + /* If a library is prelinked but we have to relocate anyway, + we have to be able to undo the prelinking of .plt section. + The prelinker saved us at got[1] address of .glink + section's start. */ + if (glink) + { + glink += map->l_addr; + while (num_plt_entries-- != 0) + *plt++ = glink, glink += 4; + } + else + while (num_plt_entries-- != 0) + *plt++ += map->l_addr; + } + return lazy; +} + +/* Change the PLT entry whose reloc is 'reloc' to call the actual routine. */ +extern Elf32_Addr __elf_machine_fixup_plt (struct link_map *map, + Elf32_Addr *reloc_addr, + Elf32_Addr finaladdr); + +static inline Elf32_Addr +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const Elf32_Rela *reloc, + Elf32_Addr *reloc_addr, Elf64_Addr finaladdr) +{ + if (map->l_info[DT_PPC(GOT)] == 0) + /* Handle old style PLT. */ + return __elf_machine_fixup_plt (map, reloc_addr, finaladdr); + + *reloc_addr = finaladdr; + return finaladdr; +} + +/* Return the final value of a plt relocation. */ +static inline Elf32_Addr +elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + Elf32_Addr value) +{ + return value + reloc->r_addend; +} + + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER ppc32_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc32_gnu_pltexit + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +/* Do the actual processing of a reloc, once its target address + has been determined. */ +extern void __process_machine_rela (struct link_map *map, + const Elf32_Rela *reloc, + struct link_map *sym_map, + const Elf32_Sym *sym, + const Elf32_Sym *refsym, + Elf32_Addr *const reloc_addr, + Elf32_Addr finaladdr, + int rinfo) attribute_hidden; + +/* Call _dl_signal_error when a resolved value overflows a relocated area. */ +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf32_Addr *const reloc_addr, + const Elf32_Sym *refsym) attribute_hidden; + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + const Elf32_Sym *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) +{ + Elf32_Addr *const reloc_addr = reloc_addr_arg; + const Elf32_Sym *const refsym = sym; + Elf32_Addr value; + const int r_type = ELF32_R_TYPE (reloc->r_info); + struct link_map *sym_map = NULL; + +#ifndef RESOLVE_CONFLICT_FIND_MAP + if (r_type == R_PPC_RELATIVE) + { + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (__glibc_unlikely (r_type == R_PPC_NONE)) + return; + + /* binutils on ppc32 includes st_value in r_addend for relocations + against local symbols. */ + if (__builtin_expect (ELF32_ST_BIND (sym->st_info) == STB_LOCAL, 0) + && sym->st_shndx != SHN_UNDEF) + value = map->l_addr; + else + { + sym_map = RESOLVE_MAP (&sym, version, r_type); + value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value; + } + value += reloc->r_addend; +#else + value = reloc->r_addend; +#endif + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = elf_ifunc_invoke (value); + + /* A small amount of code is duplicated here for speed. In libc, + more than 90% of the relocs are R_PPC_RELATIVE; in the X11 shared + libraries, 60% are R_PPC_RELATIVE, 24% are R_PPC_GLOB_DAT or + R_PPC_ADDR32, and 16% are R_PPC_JMP_SLOT (which this routine + wouldn't usually handle). As an bonus, doing this here allows + the switch statement in __process_machine_rela to work. */ + switch (r_type) + { + case R_PPC_GLOB_DAT: + case R_PPC_ADDR32: + *reloc_addr = value; + break; + +#ifndef RESOLVE_CONFLICT_FIND_MAP +# ifdef RTLD_BOOTSTRAP +# define NOT_BOOTSTRAP 0 +# else +# define NOT_BOOTSTRAP 1 +# endif + + case R_PPC_DTPMOD32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + else if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + } + } + if (!NOT_BOOTSTRAP) + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; + else if (sym_map != NULL) + /* Get the information from the link map returned by the + RESOLVE_MAP function. */ + *reloc_addr = sym_map->l_tls_modid; + break; + case R_PPC_DTPREL32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + else if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC_DTPMOD32. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + } + } + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + if (NOT_BOOTSTRAP && sym_map != NULL) + *reloc_addr = TLS_DTPREL_VALUE (sym, reloc); + break; + case R_PPC_TPREL32: + if (!NOT_BOOTSTRAP || sym_map != NULL) + { + if (NOT_BOOTSTRAP) + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + } + break; +#endif + + case R_PPC_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr); +#endif + if (map->l_info[DT_PPC(GOT)] != 0) + { + *reloc_addr = value; + break; + } + /* FALLTHROUGH */ + + default: + __process_machine_rela (map, reloc, sym_map, sym, refsym, + reloc_addr, value, r_type); + } +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf32_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) +{ + /* elf_machine_runtime_setup handles this. */ +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S new file mode 100644 index 0000000000..ab429567aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S @@ -0,0 +1,103 @@ +/* Machine-dependent ELF startup code. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ +ENTRY(_start) +/* We start with the following on the stack, from top: + argc (4 bytes); + arguments for program (terminated by NULL); + environment variables (terminated by NULL); + arguments for the program loader. */ + +/* Call _dl_start with one parameter pointing at argc */ + mr r3,r1 +/* (we have to frob the stack pointer a bit to allow room for + _dl_start to save the link register). */ + li r4,0 + addi r1,r1,-16 + stw r4,0(r1) + bl _dl_start@local + + /* FALLTHRU */ +_dl_start_user: +/* Now, we do our main work of calling initialisation procedures. + The ELF ABI doesn't say anything about parameters for these, + so we just pass argc, argv, and the environment. + Changing these is strongly discouraged (not least because argc is + passed by value!). */ + +/* Put our GOT pointer in r31, */ + SETUP_GOT_ACCESS(r31,got_label) + addis r31,r31,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r31,r31,_GLOBAL_OFFSET_TABLE_-got_label@l +/* the address of _start in r30, */ + mr r30,r3 +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ + lwz r28,_rtld_local@got(r31) + lwz r29,_dl_argc@got(r31) + lwz r27,__GI__dl_argv@got(r31) + +/* Call _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ + lwz r3,0(r28) + lwz r4,0(r29) + lwz r5,0(r27) + slwi r6,r4,2 + add r6,r5,r6 + addi r6,r6,4 + bl _dl_init@local + +/* Now, to conform to the ELF ABI, we have to: */ +/* Pass argc (actually _dl_argc) in r3; */ + lwz r3,0(r29) +/* pass argv (actually _dl_argv) in r4; */ + lwz r4,0(r27) +/* pass envp (actually _dl_argv+_dl_argc+1) in r5; */ + slwi r5,r3,2 + add r6,r4,r5 + addi r5,r6,4 +/* pass the auxiliary vector in r6. This is passed to us just after _envp. */ +2: lwzu r0,4(r6) + cmpwi r0,0 + bne 2b + addi r6,r6,4 +/* Pass a termination function pointer (in this case _dl_fini) in r7. */ + lwz r7,_dl_fini@got(r31) +/* Now, call the start function in r30... */ + mtctr r30 +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL value. + (This lets our startup code distinguish between a program linked statically, + which linux will call with argc on top of the stack which will hopefully + never be zero, and a dynamically linked program which will always have + a NULL on the top of the stack). + Take the opportunity to clear LR, so anyone who accidentally returns + from _start gets SEGV. Also clear the next few words of the stack. */ + +_dl_main_dispatch: + li r31,0 + stw r31,0(r1) + mtlr r31 + stw r31,4(r1) + stw r31,8(r1) + stw r31,12(r1) +/* Go do it! */ + bctr +END(_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S new file mode 100644 index 0000000000..16b12db0e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S @@ -0,0 +1,189 @@ +/* PLT trampolines. PPC32 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".text" + .align 2 + .globl _dl_runtime_resolve + .type _dl_runtime_resolve,@function +_dl_runtime_resolve: + cfi_startproc + # We need to save the registers used to pass parameters, and register 0, + # which is used by _mcount; the registers are saved in a stack frame. + stwu r1,-64(r1) + cfi_adjust_cfa_offset (64) + stw r0,12(r1) + stw r3,16(r1) + stw r4,20(r1) + # The code that calls this has put parameters for `fixup' in r12 and r11. + mr r3,r12 + stw r5,24(r1) + mr r4,r11 + stw r6,28(r1) + mflr r0 + # We also need to save some of the condition register fields + stw r7,32(r1) + # Don't clobber the caller's LRSAVE, it is needed by _mcount. + stw r0,48(r1) + cfi_offset (lr, -16) + stw r8,36(r1) + mfcr r0 + stw r9,40(r1) + stw r10,44(r1) + stw r0,8(r1) + bl _dl_fixup@local + # 'fixup' returns the address we want to branch to. + mtctr r3 + # Put the registers back... + lwz r0,48(r1) + lwz r10,44(r1) + lwz r9,40(r1) + mtlr r0 + lwz r8,36(r1) + lwz r0,8(r1) + lwz r7,32(r1) + lwz r6,28(r1) + mtcrf 0xFF,r0 + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r0,12(r1) + # ...unwind the stack frame, and jump to the PLT entry we updated. + addi r1,r1,64 + bctr + cfi_endproc + .size _dl_runtime_resolve,.-_dl_runtime_resolve + +#ifndef PROF + .align 2 + .globl _dl_prof_resolve + .type _dl_prof_resolve,@function +_dl_prof_resolve: + cfi_startproc + # We need to save the registers used to pass parameters, and register 0, + # which is used by _mcount; the registers are saved in a stack frame. + stwu r1,-320(r1) + cfi_adjust_cfa_offset (320) + /* Stack layout: + + +312 stackframe + +308 lr + +304 r1 + +288 v12 + +272 v11 + +256 v10 + +240 v9 + +224 v8 + +208 v7 + +192 v6 + +176 v5 + +160 v4 + +144 v3 + +128 v2 + +112 v1 + +104 fp8 + +96 fp7 + +88 fp6 + +80 fp5 + +72 fp4 + +64 fp3 + +56 fp2 + +48 fp1 + +44 r10 + +40 r9 + +36 r8 + +32 r7 + +28 r6 + +24 r5 + +20 r4 + +16 r3 + +12 r0 + +8 cr + r1 link + */ + stw r0,12(r1) + stw r3,16(r1) + stw r4,20(r1) + # The code that calls this has put parameters for `fixup' in r12 and r11. + mr r3,r12 + stw r5,24(r1) + mr r4,r11 + stw r6,28(r1) + mflr r5 + # We also need to save some of the condition register fields. + stw r7,32(r1) + # Don't clobber the caller's LRSAVE, it is needed by _mcount. + stw r5,308(r1) + cfi_offset (lr, -12) + stw r8,36(r1) + mfcr r0 + stw r9,40(r1) + stw r10,44(r1) + stw r0,8(r1) +#ifndef __NO_FPRS__ + # Save the floating point registers + stfd fp1,48(r1) + stfd fp2,56(r1) + stfd fp3,64(r1) + stfd fp4,72(r1) + stfd fp5,80(r1) + stfd fp6,88(r1) + stfd fp7,96(r1) + stfd fp8,104(r1) +#endif + # XXX TODO: store vmx registers + # Load the extra parameters. + addi r6,r1,16 + addi r7,r1,312 + li r0,-1 + stw r0,0(r7) + bl _dl_profile_fixup@local + # 'fixup' returns the address we want to branch to. + mtctr r3 + # Put the registers back... + lwz r0,308(r1) + lwz r10,44(r1) + lwz r9,40(r1) + mtlr r0 + lwz r8,36(r1) + lwz r0,8(r1) + lwz r7,32(r1) + lwz r6,28(r1) + mtcrf 0xFF,r0 + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r0,12(r1) +#ifndef __NO_FPRS__ + # Load the floating point registers. + lfd fp1,48(r1) + lfd fp2,56(r1) + lfd fp3,64(r1) + lfd fp4,72(r1) + lfd fp5,80(r1) + lfd fp6,88(r1) + lfd fp7,96(r1) + lfd fp8,104(r1) +#endif + # ...unwind the stack frame, and jump to the PLT entry we updated. + addi r1,r1,320 + bctr + cfi_endproc + .size _dl_prof_resolve,.-_dl_prof_resolve +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile new file mode 100644 index 0000000000..adf556870a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile @@ -0,0 +1,9 @@ +ifeq ($(subdir),math) +libm-routines += fexcepts_to_spe fexcepts_from_spe +libm-routines += fexcepts_to_prctl fexcepts_from_prctl +libm-routines += fe_note_change +endif + +ifeq ($(subdir),soft-fp) +sysdep_routines += fraiseexcept-soft +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c new file mode 100644 index 0000000000..09132451a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c @@ -0,0 +1,50 @@ +/* Clear floating-point exceptions for atomic compound assignment. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feclearexcept (void) +{ + unsigned int fpescr, old_fpescr; + + /* Get the current state. */ + old_fpescr = fpescr = fegetenv_register (); + + /* Clear the relevant bits. */ + fpescr &= ~SPEFSCR_ALL_EXCEPT; + + /* Put the new state in effect. */ + fesetenv_register (fpescr); + + /* Let the kernel know if the "invalid" or "underflow" bit was + cleared. */ + if (old_fpescr & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) + { + int pflags __attribute__ ((__unused__)), r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c new file mode 100644 index 0000000000..3d6e10f1b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c @@ -0,0 +1,55 @@ +/* Store current floating-point environment and clear exceptions for + atomic compound assignment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Get the current state. */ + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Clear everything except for the rounding mode and trapping to the + kernel. */ + u.l[0] &= ~(PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV); + u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE); + + /* Put the new state in effect. */ + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c new file mode 100644 index 0000000000..a4615a1b01 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c @@ -0,0 +1,46 @@ +/* Install given floating-point environment and raise exceptions for + atomic compound assignment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feupdateenv (const fenv_t *envp) +{ + int exc; + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Save the currently set exceptions. */ + exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT; + + u.fenv = *envp; + + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + + /* Raise (if appropriate) saved exceptions. */ + __feraiseexcept_soft (exc); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c new file mode 100644 index 0000000000..cbf8d9df6c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c @@ -0,0 +1,53 @@ +/* Clear given exceptions in current floating-point environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feclearexcept +int +__feclearexcept (int excepts) +{ + unsigned int fpescr; + int excepts_spe = __fexcepts_to_spe (excepts); + + /* Get the current state. */ + fpescr = fegetenv_register (); + + /* Clear the relevant bits. */ + fpescr &= ~excepts_spe; + + /* Put the new state in effect. */ + fesetenv_register (fpescr); + + /* Let the kernel know if the "invalid" or "underflow" bit was + cleared. */ + if (excepts & (FE_INVALID | FE_UNDERFLOW)) + __fe_note_change (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c new file mode 100644 index 0000000000..3dd3161f7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c @@ -0,0 +1,39 @@ +/* Note a change to floating-point exceptions. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +/* Inform the kernel of a change to floating-point exceptions. */ + +void +__fe_note_change (void) +{ + int pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return; + if ((pflags & PR_FP_EXC_SW_ENABLE) == 0) + INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); +} + +libm_hidden_def (__fe_note_change) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c new file mode 100644 index 0000000000..94ce45463c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c @@ -0,0 +1,54 @@ +/* Disable floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fedisableexcept (int excepts) +{ + int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Save old enable bits. */ + result = __fexcepts_from_prctl (pflags); + + pflags &= ~__fexcepts_to_prctl (excepts); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* If disabling signals for "inexact", also disable trapping to the + kernel. */ + if ((excepts & FE_INEXACT) != 0) + { + unsigned long fpescr; + + fpescr = fegetenv_register (); + fpescr &= ~SPEFSCR_FINXE; + fesetenv_register (fpescr); + } + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c new file mode 100644 index 0000000000..32116d1608 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c @@ -0,0 +1,54 @@ +/* Enable floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +feenableexcept (int excepts) +{ + unsigned int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Save old enable bits. */ + result = __fexcepts_from_prctl (pflags); + + pflags |= __fexcepts_to_prctl (excepts); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* If enabling signals for "inexact", also enable trapping to the + kernel. */ + if ((excepts & FE_INEXACT) != 0) + { + unsigned long fpescr; + + fpescr = fegetenv_register (); + fpescr |= SPEFSCR_FINXE; + fesetenv_register (fpescr); + } + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c new file mode 100644 index 0000000000..01b8fa4c9a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c @@ -0,0 +1,49 @@ +/* Store current floating-point environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__fegetenv (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) + +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c new file mode 100644 index 0000000000..74fdb5a1c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c @@ -0,0 +1,36 @@ +/* Get floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fegetexcept (void) +{ + int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + result = __fexcepts_from_prctl (pflags); + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c new file mode 100644 index 0000000000..d262714266 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c @@ -0,0 +1,37 @@ +/* Store current floating-point control modes. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fegetmode (femode_t *modep) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *modep = u.fenv; + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c new file mode 100644 index 0000000000..afcc5d18cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c @@ -0,0 +1,31 @@ +/* Return current rounding direction. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetround (void) +{ + unsigned long fpescr; + + fpescr = fegetenv_register (); + return fpescr & 3; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c new file mode 100644 index 0000000000..cba1239561 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c @@ -0,0 +1,59 @@ +/* Store current floating-point environment and clear exceptions. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Get the current state. */ + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Clear everything except for the rounding mode and trapping to the + kernel. */ + u.l[0] &= ~(PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV); + u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE); + + /* Put the new state in effect. */ + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c new file mode 100644 index 0000000000..9fc3f53bc7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c @@ -0,0 +1,45 @@ +/* Constant floating-point environments for e500. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The use of "unsigned long long" as the type to define the + bit-pattern explicitly, rather than the type "double" used in + <bits/fenv.h>, means that we cannot include <fenv_libc.h> here to + get the enum constants for the SPEFSCR bits to enable + exceptions. */ + +#include <sys/prctl.h> + +/* If the default argument is used we use this value. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = + 0x3cULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = + (((unsigned long long) (PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV)) << 32) | 0x7cULL; + +/* Non-IEEE mode. */ +const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) = + 0x0ULL; diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h new file mode 100644 index 0000000000..13437f8052 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h @@ -0,0 +1,99 @@ +/* Internal libc stuff for floating point environment routines. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> + +int __feraiseexcept_spe (int); +libm_hidden_proto (__feraiseexcept_spe) + +int __feraiseexcept_soft (int); +libc_hidden_proto (__feraiseexcept_soft) + +int __fexcepts_to_spe (int); +libm_hidden_proto (__fexcepts_to_spe) + +int __fexcepts_from_spe (int); +libm_hidden_proto (__fexcepts_from_spe) + +int __fexcepts_to_prctl (int); +libm_hidden_proto (__fexcepts_to_prctl) + +int __fexcepts_from_prctl (int); +libm_hidden_proto (__fexcepts_from_prctl) + +void __fe_note_change (void); +libm_hidden_proto (__fe_note_change) + +/* Equivalent to fegetenv, but returns an unsigned int instead of + taking a pointer. */ +#define fegetenv_register() \ + ({ unsigned int fscr; asm volatile ("mfspefscr %0" : "=r" (fscr)); fscr; }) + +/* Equivalent to fesetenv, but takes an unsigned int instead of a + pointer. */ +#define fesetenv_register(fscr) \ + ({ asm volatile ("mtspefscr %0" : : "r" (fscr)); }) + +typedef union +{ + fenv_t fenv; + unsigned int l[2]; +} fenv_union_t; + +/* Definitions of all the SPEFSCR bit numbers. */ +enum { + SPEFSCR_SOVH = 0x80000000, + SPEFSCR_OVH = 0x40000000, + SPEFSCR_FGH = 0x20000000, + SPEFSCR_FXH = 0x10000000, + SPEFSCR_FINVH = 0x08000000, + SPEFSCR_FDBZH = 0x04000000, + SPEFSCR_FUNFH = 0x02000000, + SPEFSCR_FOVFH = 0x01000000, + /* 2 unused bits. */ + SPEFSCR_FINXS = 0x00200000, + SPEFSCR_FINVS = 0x00100000, + SPEFSCR_FDBZS = 0x00080000, + SPEFSCR_FUNFS = 0x00040000, + SPEFSCR_FOVFS = 0x00020000, + /* Combination of the exception bits. */ + SPEFSCR_ALL_EXCEPT = 0x003e0000, + SPEFSCR_MODE = 0x00010000, + SPEFSCR_SOV = 0x00008000, + SPEFSCR_OV = 0x00004000, + SPEFSCR_FG = 0x00002000, + SPEFSCR_FX = 0x00001000, + SPEFSCR_FINV = 0x00000800, + SPEFSCR_FDBZ = 0x00000400, + SPEFSCR_FUNF = 0x00000200, + SPEFSCR_FOVF = 0x00000100, + /* 1 unused bit. */ + SPEFSCR_FINXE = 0x00000040, + SPEFSCR_FINVE = 0x00000020, + SPEFSCR_FDBZE = 0x00000010, + SPEFSCR_FUNFE = 0x00000008, + SPEFSCR_FOVFE = 0x00000004, + /* Combination of the exception trap enable bits. */ + SPEFSCR_ALL_EXCEPT_ENABLE = 0x0000007c, + SPEFSCR_FRMC = 0x00000003 +}; + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c new file mode 100644 index 0000000000..185bcdb051 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c @@ -0,0 +1,50 @@ +/* Install given floating-point environment. e500 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + u.fenv = *envp; + + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c new file mode 100644 index 0000000000..688583a6e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c @@ -0,0 +1,37 @@ +/* Set given exception flags. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + unsigned long old_spefscr, spefscr; + int excepts_spe = __fexcepts_to_spe (excepts); + + old_spefscr = fegetenv_register (); + spefscr = old_spefscr | excepts_spe; + fesetenv_register (spefscr); + + /* If the state of the "invalid" or "underflow" flag has changed, + inform the kernel. */ + if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0) + __fe_note_change (); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c new file mode 100644 index 0000000000..360e500b27 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c @@ -0,0 +1,43 @@ +/* Install given floating-point control modes. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +#define SPEFSCR_STATUS 0xff3eff00 + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + u.fenv = *modep; + unsigned int spefscr = fegetenv_register (); + spefscr = (spefscr & SPEFSCR_STATUS) | (u.l[1] & ~SPEFSCR_STATUS); + + fesetenv_register (spefscr); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c new file mode 100644 index 0000000000..15aaa62079 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c @@ -0,0 +1,37 @@ +/* Set current rounding direction. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetround (int round) +{ + unsigned long fpescr; + + if ((unsigned int) round > 3) + return 1; + + fpescr = fegetenv_register (); + fpescr = (fpescr & ~SPEFSCR_FRMC) | (round & 3); + fesetenv_register (fpescr); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c new file mode 100644 index 0000000000..9d42d919ec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c @@ -0,0 +1,25 @@ +/* Test exception in saved exception state. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexceptflag (const fexcept_t *flagp, int excepts) +{ + return __fexcepts_from_spe (*flagp) & excepts & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c new file mode 100644 index 0000000000..54de708449 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c @@ -0,0 +1,48 @@ +/* Install given floating-point environment and raise exceptions. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feupdateenv (const fenv_t *envp) +{ + int exc; + + /* Save the currently set exceptions. */ + exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT; + + /* Install new environment. */ + __fesetenv (envp); + + /* Raise (if appropriate) saved exceptions. */ + __feraiseexcept_spe (exc); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c new file mode 100644 index 0000000000..b260fc8df1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c @@ -0,0 +1,42 @@ +/* Convert floating-point exceptions from prctl form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sys/prctl.h> + +/* Convert EXCEPTS from prctl bits to FE_* form, returning the + converted value. */ + +int +__fexcepts_from_prctl (int excepts) +{ + int result = 0; + if (excepts & PR_FP_EXC_OVF) + result |= FE_OVERFLOW; + if (excepts & PR_FP_EXC_UND) + result |= FE_UNDERFLOW; + if (excepts & PR_FP_EXC_INV) + result |= FE_INVALID; + if (excepts & PR_FP_EXC_DIV) + result |= FE_DIVBYZERO; + if (excepts & PR_FP_EXC_RES) + result |= FE_INEXACT; + return result; +} + +libm_hidden_def (__fexcepts_from_prctl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c new file mode 100644 index 0000000000..a925fe4c37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c @@ -0,0 +1,41 @@ +/* Convert floating-point exceptions from SPEFSCR form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +/* Convert EXCEPTS from SPEFSCR bits to FE_* form, returning the + converted value. */ + +int +__fexcepts_from_spe (int excepts) +{ + int result = 0; + if (excepts & SPEFSCR_FINXS) + result |= FE_INEXACT; + if (excepts & SPEFSCR_FDBZS) + result |= FE_DIVBYZERO; + if (excepts & SPEFSCR_FUNFS) + result |= FE_UNDERFLOW; + if (excepts & SPEFSCR_FOVFS) + result |= FE_OVERFLOW; + if (excepts & SPEFSCR_FINVS) + result |= FE_INVALID; + return result; +} + +libm_hidden_def (__fexcepts_from_spe) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c new file mode 100644 index 0000000000..e4626312ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c @@ -0,0 +1,42 @@ +/* Convert floating-point exceptions to prctl form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sys/prctl.h> + +/* Convert EXCEPTS from FE_* form to prctl bits, returning the + converted value. */ + +int +__fexcepts_to_prctl (int excepts) +{ + int result = 0; + if (excepts & FE_INEXACT) + result |= PR_FP_EXC_RES; + if (excepts & FE_DIVBYZERO) + result |= PR_FP_EXC_DIV; + if (excepts & FE_UNDERFLOW) + result |= PR_FP_EXC_UND; + if (excepts & FE_OVERFLOW) + result |= PR_FP_EXC_OVF; + if (excepts & FE_INVALID) + result |= PR_FP_EXC_INV; + return result; +} + +libm_hidden_def (__fexcepts_to_prctl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c new file mode 100644 index 0000000000..3eed4ae6e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c @@ -0,0 +1,41 @@ +/* Convert floating-point exceptions to SPEFSCR form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +/* Convert EXCEPTS from FE_* form to SPEFSCR bits, returning the + converted value. */ + +int +__fexcepts_to_spe (int excepts) +{ + int result = 0; + if (excepts & FE_INEXACT) + result |= SPEFSCR_FINXS; + if (excepts & FE_DIVBYZERO) + result |= SPEFSCR_FDBZS; + if (excepts & FE_UNDERFLOW) + result |= SPEFSCR_FUNFS; + if (excepts & FE_OVERFLOW) + result |= SPEFSCR_FOVFS; + if (excepts & FE_INVALID) + result |= SPEFSCR_FINVS; + return result; +} + +libm_hidden_def (__fexcepts_to_spe) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c new file mode 100644 index 0000000000..cff4330a9c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c @@ -0,0 +1,41 @@ +/* Store current representation for exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + unsigned long fpescr; + + /* Get the current state. */ + fpescr = fegetenv_register (); + + *flagp = fpescr & SPEFSCR_ALL_EXCEPT; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c new file mode 100644 index 0000000000..4fb8d034c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c @@ -0,0 +1,39 @@ +/* Return current rounding mode as correct value for FLT_ROUNDS. e500 + version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> + +int +__flt_rounds (void) +{ + switch (fegetenv_register () & SPEFSCR_FRMC) + { + case FE_TOWARDZERO: + return 0; + case FE_TONEAREST: + return 1; + case FE_UPWARD: + return 2; + case FE_DOWNWARD: + return 3; + default: + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c new file mode 100644 index 0000000000..ef35a9426d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c @@ -0,0 +1,25 @@ +/* Raise given exceptions. e500 version for use from soft-fp. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2004. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <libc-symbols.h> + +#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_soft +#include "spe-raise.c" +libc_hidden_def (__feraiseexcept_soft) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c new file mode 100644 index 0000000000..915642a1dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c @@ -0,0 +1,41 @@ +/* Raise given exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_spe +#include "spe-raise.c" + +libm_hidden_def (__feraiseexcept_spe) + +#undef feraiseexcept +int +__feraiseexcept (int excepts) +{ + return __feraiseexcept_spe (__fexcepts_to_spe (excepts)); +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c new file mode 100644 index 0000000000..f1e6a02681 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c @@ -0,0 +1,55 @@ +/* Set floating-point environment exception handling. e500 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + unsigned long old_spefscr, spefscr; + fexcept_t flag; + int excepts_spe = __fexcepts_to_spe (excepts); + + /* Get the current state. */ + old_spefscr = fegetenv_register (); + + /* Ignore exceptions not listed in 'excepts'. */ + flag = *flagp & excepts_spe; + + /* Replace the exception status */ + spefscr = (old_spefscr & ~excepts_spe) | flag; + + /* Store the new status word (along with the rest of the environment). */ + fesetenv_register (spefscr); + + /* If the state of the "invalid" or "underflow" flag has changed, + inform the kernel. */ + if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0) + __fe_note_change (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c new file mode 100644 index 0000000000..05040d7224 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c @@ -0,0 +1,31 @@ +/* Test exception in current environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + unsigned long f; + + /* Get the current state. */ + f = fegetenv_register (); + + return __fexcepts_from_spe (f) & excepts; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h new file mode 100644 index 0000000000..117e7331e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h @@ -0,0 +1,4 @@ +/* The generic version of get-rounding-mode.h using fpu_control.h, not + the one using the software rounding mode, is correct for e500. */ + +#include <sysdeps/generic/get-rounding-mode.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S new file mode 100644 index 0000000000..9d00b62923 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S @@ -0,0 +1,27 @@ +/* Floating-point absolute value. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__fabsf) +/* float [r3] fabsf (float [r3] x) ; */ + efsabs r3,r3 + blr +END (__fabsf) + +weak_alias (__fabsf, fabsf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c new file mode 100644 index 0000000000..cc13c67786 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c @@ -0,0 +1,53 @@ +/* Raise given exceptions, given the SPEFSCR bits for those exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__FERAISEEXCEPT_INTERNAL (int excepts) +{ + unsigned long f; + + f = fegetenv_register (); + f |= (excepts & SPEFSCR_ALL_EXCEPT); + fesetenv_register (f); + + /* Force the operations that cause the exceptions. */ + if ((SPEFSCR_FINVS & excepts) != 0) + /* 0 / 0 */ + asm volatile ("efsdiv %0,%0,%1" : : "r" (0), "r" (0)); + + if ((SPEFSCR_FDBZS & excepts) != 0) + /* 1.0 / 0.0 */ + asm volatile ("efsdiv %0,%0,%1" : : "r" (1.0F), "r" (0)); + + if ((SPEFSCR_FOVFS & excepts) != 0) + /* Largest normalized number plus itself. */ + asm volatile ("efsadd %0,%0,%1" : : "r" (0x7f7fffff), "r" (0x7f7fffff)); + + if ((SPEFSCR_FUNFS & excepts) != 0) + /* Smallest normalized number times itself. */ + asm volatile ("efsmul %0,%0,%1" : : "r" (0x800000), "r" (0x800000)); + + if ((SPEFSCR_FINXS & excepts) != 0) + /* Smallest normalized minus 1.0 raises the inexact flag. */ + asm volatile ("efssub %0,%0,%1" : : "r" (0x00800000), "r" (1.0F)); + + /* Success. */ + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile new file mode 100644 index 0000000000..e05073970d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),misc) +sysdep_routines += fprsave fprrest +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S new file mode 100644 index 0000000000..c01c94dfb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S @@ -0,0 +1,178 @@ +/* longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + + .machine "altivec" +ENTRY (__longjmp_symbol) +#ifndef __NO_VMX__ +# ifdef PIC + mflr r6 + cfi_register (lr,r6) + SETUP_GOT_ACCESS(r5,got_label) + addis r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@l +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + lwz r5,_rtld_local_ro@got(r5) +# else + lwz r5,_rtld_global_ro@got(r5) +# endif + mtlr r6 + cfi_same_value (lr) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) +# else + lwz r5,_dl_hwcap@got(r5) + mtlr r6 + cfi_same_value (lr) + lwz r5,LOWORD(r5) +# endif +# else + lis r5,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r5) +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*4)(3) + andi. r6,r5,0xf + lwz r0,((JB_VRSAVE)*4)(3) + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 + lvsl v0,0,r5 + lvx v1,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + vperm v20,v1,v21,v0 +# define load_misaligned_vmx_lo_loaded(loadvr,lovr,shiftvr,loadgpr,addgpr) \ + addi addgpr,addgpr,32; \ + lvx lovr,0,loadgpr; \ + vperm loadvr,loadvr,lovr,shiftvr; + load_misaligned_vmx_lo_loaded(v21,v22,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v22,v23,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v23,v24,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v24,v25,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v25,v26,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v26,v27,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v27,v28,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v28,v29,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v29,v30,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v30,v31,v0,r6,r5) + lvx v1,0,r5 + vperm v31,v31,v1,v0 + b L(no_vmx) +L(aligned_restore_vmx): + addi r6,r5,16 + lvx v20,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + addi r6,r6,32 + lvx v22,0,r5 + addi r5,r5,32 + lvx v23,0,r6 + addi r6,r6,32 + lvx v24,0,r5 + addi r5,r5,32 + lvx v25,0,r6 + addi r6,r6,32 + lvx v26,0,r5 + addi r5,r5,32 + lvx v27,0,r6 + addi r6,r6,32 + lvx v28,0,r5 + addi r5,r5,32 + lvx v29,0,r6 + addi r6,r6,32 + lvx v30,0,r5 + lvx v31,0,r6 +L(no_vmx): +#endif +#if defined PTR_DEMANGLE || defined CHECK_SP + lwz r24,(JB_GPR1*4)(r3) +# ifdef CHECK_SP +# ifdef PTR_DEMANGLE + PTR_DEMANGLE3 (r24, r24, r25) +# endif + CHECK_SP (r24) + mr r1,r24 +# endif +#else + lwz r1,(JB_GPR1*4)(r3) +#endif + lwz r0,(JB_LR*4)(r3) + lwz r14,((JB_GPRS+0)*4)(r3) + lfd fp14,((JB_FPRS+0*2)*4)(r3) + lwz r15,((JB_GPRS+1)*4)(r3) + lfd fp15,((JB_FPRS+1*2)*4)(r3) + lwz r16,((JB_GPRS+2)*4)(r3) + lfd fp16,((JB_FPRS+2*2)*4)(r3) + lwz r17,((JB_GPRS+3)*4)(r3) + lfd fp17,((JB_FPRS+3*2)*4)(r3) + lwz r18,((JB_GPRS+4)*4)(r3) + lfd fp18,((JB_FPRS+4*2)*4)(r3) + lwz r19,((JB_GPRS+5)*4)(r3) + lfd fp19,((JB_FPRS+5*2)*4)(r3) + lwz r20,((JB_GPRS+6)*4)(r3) + lfd fp20,((JB_FPRS+6*2)*4)(r3) +#ifdef PTR_DEMANGLE +# ifndef CHECK_SP + PTR_DEMANGLE3 (r1, r24, r25) +# endif + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (4@3), + second argument (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (longjmp, 3, 4@3, -4@4, 4@0) + mtlr r0 + lwz r21,((JB_GPRS+7)*4)(r3) + lfd fp21,((JB_FPRS+7*2)*4)(r3) + lwz r22,((JB_GPRS+8)*4)(r3) + lfd fp22,((JB_FPRS+8*2)*4)(r3) + lwz r5,(JB_CR*4)(r3) + lwz r23,((JB_GPRS+9)*4)(r3) + lfd fp23,((JB_FPRS+9*2)*4)(r3) + lwz r24,((JB_GPRS+10)*4)(r3) + lfd fp24,((JB_FPRS+10*2)*4)(r3) + lwz r25,((JB_GPRS+11)*4)(r3) + lfd fp25,((JB_FPRS+11*2)*4)(r3) + mtcrf 0xFF,r5 + lwz r26,((JB_GPRS+12)*4)(r3) + lfd fp26,((JB_FPRS+12*2)*4)(r3) + lwz r27,((JB_GPRS+13)*4)(r3) + lfd fp27,((JB_FPRS+13*2)*4)(r3) + lwz r28,((JB_GPRS+14)*4)(r3) + lfd fp28,((JB_FPRS+14*2)*4)(r3) + lwz r29,((JB_GPRS+15)*4)(r3) + lfd fp29,((JB_FPRS+15*2)*4)(r3) + lwz r30,((JB_GPRS+16)*4)(r3) + lfd fp30,((JB_FPRS+16*2)*4)(r3) + lwz r31,((JB_GPRS+17)*4)(r3) + lfd fp31,((JB_FPRS+17*2)*4)(r3) + LIBC_PROBE (longjmp_target, 3, 4@3, -4@4, 4@0) + mr r3,r4 + blr +END (__longjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S new file mode 100644 index 0000000000..0e62245927 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S @@ -0,0 +1,40 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __longjmp_symbol __longjmp +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp) +# define __longjmp_symbol __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef JB_SIZE +# undef __longjmp_symbol +# define __longjmp_symbol __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure new file mode 100644 index 0000000000..98c6f30ca3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure @@ -0,0 +1,56 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc32/fpu. + +# Test whether integer to floating point conversions use fcfid. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fcfid use" >&5 +$as_echo_n "checking for fcfid use... " >&6; } +if ${libc_cv_ppc_fcfid+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'double foo (int x) { return (double) x; }' > conftest.c +libc_cv_ppc_fcfid=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '[ ]fcfid' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fcfid=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_fcfid" >&5 +$as_echo "$libc_cv_ppc_fcfid" >&6; } +if test $libc_cv_ppc_fcfid = yes; then + $as_echo "#define HAVE_PPC_FCFID 1" >>confdefs.h + +fi + +# Test whether floating point to long long conversions use fctidz. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fctidz use" >&5 +$as_echo_n "checking for fctidz use... " >&6; } +if ${libc_cv_ppc_fctidz+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'long long int foo (double x) { return (long long int) x; }' > conftest.c +libc_cv_ppc_fctidz=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '[ ]fctidz' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fctidz=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_fctidz" >&5 +$as_echo "$libc_cv_ppc_fctidz" >&6; } +if test $libc_cv_ppc_fctidz = yes; then + $as_echo "#define HAVE_PPC_FCTIDZ 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac new file mode 100644 index 0000000000..1899705aab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac @@ -0,0 +1,34 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc32/fpu. + +# Test whether integer to floating point conversions use fcfid. +AC_CACHE_CHECK([for fcfid use], [libc_cv_ppc_fcfid], [dnl +echo 'double foo (int x) { return (double) x; }' > conftest.c +libc_cv_ppc_fcfid=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '[ ]fcfid' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fcfid=yes + fi +changequote([,])dnl +fi +rm -rf conftest*]) +if test $libc_cv_ppc_fcfid = yes; then + AC_DEFINE([HAVE_PPC_FCFID]) +fi + +# Test whether floating point to long long conversions use fctidz. +AC_CACHE_CHECK([for fctidz use], [libc_cv_ppc_fctidz], [dnl +echo 'long long int foo (double x) { return (long long int) x; }' > conftest.c +libc_cv_ppc_fctidz=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '[ ]fctidz' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fctidz=yes + fi +changequote([,])dnl +fi +rm -rf conftest*]) +if test $libc_cv_ppc_fctidz = yes; then + AC_DEFINE([HAVE_PPC_FCTIDZ]) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h new file mode 100644 index 0000000000..bd3eb62bf5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h @@ -0,0 +1,28 @@ +/* Fix for conversion of integer 0 to floating point. PowerPC version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_INT_FP_CONVERT_ZERO_H +#define FIX_INT_FP_CONVERT_ZERO_H 1 + +/* The code sequences GCC generates for conversion of integers to + floating point result in -0 instead of +0 in FE_DOWNWARD mode when + the fcfid instruction is not used, as of GCC 5. See + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67771>. */ +#define FIX_INT_FP_CONVERT_ZERO (!HAVE_PPC_FCFID) + +#endif /* fix-int-fp-convert-zero.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S new file mode 100644 index 0000000000..7b2346471b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S @@ -0,0 +1,94 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + Floating Point Registers (FPRs) restore routine +*/ + +#include <sysdep.h> + +ENTRY(_restfpr_all) + .globl C_TEXT(_restf14) + .globl C_TEXT(_restfpr_14) +C_TEXT(_restf14): +C_TEXT(_restfpr_14): lfd fp14,-144(r1) + .globl C_TEXT(_restf15) + .globl C_TEXT(_restfpr_15) +C_TEXT(_restf15): +C_TEXT(_restfpr_15): lfd fp15,-136(r1) + .globl C_TEXT(_restf16) + .globl C_TEXT(_restfpr_16) +C_TEXT(_restf16): +C_TEXT(_restfpr_16): lfd fp16,-128(r1) + .globl C_TEXT(_restf17) + .globl C_TEXT(_restfpr_17) +C_TEXT(_restf17): +C_TEXT(_restfpr_17): lfd fp17,-120(r1) + .globl C_TEXT(_restf18) + .globl C_TEXT(_restfpr_18) +C_TEXT(_restf18): +C_TEXT(_restfpr_18): lfd fp18,-112(r1) + .globl C_TEXT(_restf19) + .globl C_TEXT(_restfpr_19) +C_TEXT(_restf19): +C_TEXT(_restfpr_19): lfd fp19,-104(r1) + .globl C_TEXT(_restf20) + .globl C_TEXT(_restfpr_20) +C_TEXT(_restf20): +C_TEXT(_restfpr_20): lfd fp20,-96(r1) + .globl C_TEXT(_restf21) + .globl C_TEXT(_restfpr_21) +C_TEXT(_restf21): +C_TEXT(_restfpr_21): lfd fp21,-88(r1) + .globl C_TEXT(_restf22) + .globl C_TEXT(_restfpr_22) +C_TEXT(_restf22): +C_TEXT(_restfpr_22): lfd fp22,-80(r1) + .globl C_TEXT(_restf23) + .globl C_TEXT(_restfpr_23) +C_TEXT(_restf23): +C_TEXT(_restfpr_23): lfd fp23,-72(r1) + .globl C_TEXT(_restf24) + .globl C_TEXT(_restfpr_24) +C_TEXT(_restf24): +C_TEXT(_restfpr_24): lfd fp24,-64(r1) + .globl C_TEXT(_restf25) + .globl C_TEXT(_restfpr_25) +C_TEXT(_restf25): +C_TEXT(_restfpr_25): lfd fp25,-56(r1) + .globl C_TEXT(_restf26) + .globl C_TEXT(_restfpr_26) +C_TEXT(_restf26): +C_TEXT(_restfpr_26): lfd fp26,-48(r1) + .globl C_TEXT(_restf27) + .globl C_TEXT(_restfpr_27) +C_TEXT(_restf27): +C_TEXT(_restfpr_27): lfd fp27,-40(r1) + .globl C_TEXT(_restf28) + .globl C_TEXT(_restfpr_28) +C_TEXT(_restf28): +C_TEXT(_restfpr_28): lfd fp28,-32(r1) + .globl C_TEXT(_restf29) + .globl C_TEXT(_restfpr_29) +C_TEXT(_restf29): +C_TEXT(_restfpr_29): lwz r0,8(r1) #get return address from frame + lfd fp29,-24(r1) #restore f29 + mtlr r0 #move return address to LR + lfd fp30,-16(r1) #restore f30 + lfd fp31,-8(r1) #restore f31 + blr #return +END (_restfpr_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S new file mode 100644 index 0000000000..975a8216cb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S @@ -0,0 +1,111 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + Floating Point Registers (FPRs) save routine +*/ + +#include <sysdep.h> + +ENTRY(_savefpr_all) + .globl C_TEXT(_savef14) + .globl C_TEXT(_savefpr_14) +C_TEXT(_savef14): +C_TEXT(_savefpr_14): stfd fp14,-144(r1) + cfi_offset(fp14,-144) + .globl C_TEXT(_savef15) + .globl C_TEXT(_savefpr_15) +C_TEXT(_savef15): +C_TEXT(_savefpr_15): stfd fp15,-136(r1) + cfi_offset(fp15,-136) + .globl C_TEXT(_savef16) + .globl C_TEXT(_savefpr_16) +C_TEXT(_savef16): +C_TEXT(_savefpr_16): stfd fp16,-128(r1) + cfi_offset(fp16,-128) + .globl C_TEXT(_savef17) + .globl C_TEXT(_savefpr_17) +C_TEXT(_savef17): +C_TEXT(_savefpr_17): stfd fp17,-120(r1) + cfi_offset(fp17,-120) + .globl C_TEXT(_savef18) + .globl C_TEXT(_savefpr_18) +C_TEXT(_savef18): +C_TEXT(_savefpr_18): stfd fp18,-112(r1) + cfi_offset(fp18,-112) + .globl C_TEXT(_savef19) + .globl C_TEXT(_savefpr_19) +C_TEXT(_savef19): +C_TEXT(_savefpr_19): stfd fp19,-104(r1) + cfi_offset(fp19,-104) + .globl C_TEXT(_savef20) + .globl C_TEXT(_savefpr_20) +C_TEXT(_savef20): +C_TEXT(_savefpr_20): stfd fp20,-96(r1) + cfi_offset(fp20,-96) + .globl C_TEXT(_savef21) + .globl C_TEXT(_savefpr_21) +C_TEXT(_savef21): +C_TEXT(_savefpr_21): stfd fp21,-88(r1) + cfi_offset(fp21,-88) + .globl C_TEXT(_savef22) + .globl C_TEXT(_savefpr_22) +C_TEXT(_savef22): +C_TEXT(_savefpr_22): stfd fp22,-80(r1) + cfi_offset(fp22,-80) + .globl C_TEXT(_savef23) + .globl C_TEXT(_savefpr_23) +C_TEXT(_savef23): +C_TEXT(_savefpr_23): stfd fp23,-72(r1) + cfi_offset(fp23,-72) + .globl C_TEXT(_savef24) + .globl C_TEXT(_savefpr_24) +C_TEXT(_savef24): +C_TEXT(_savefpr_24): stfd fp24,-64(r1) + cfi_offset(fp24,-64) + .globl C_TEXT(_savef25) + .globl C_TEXT(_savefpr_25) +C_TEXT(_savef25): +C_TEXT(_savefpr_25): stfd fp25,-56(r1) + cfi_offset(fp25,-56) + .globl C_TEXT(_savef26) + .globl C_TEXT(_savefpr_26) +C_TEXT(_savef26): +C_TEXT(_savefpr_26): stfd fp26,-48(r1) + cfi_offset(fp26,-48) + .globl C_TEXT(_savef27) + .globl C_TEXT(_savefpr_27) +C_TEXT(_savef27): +C_TEXT(_savefpr_27): stfd fp27,-40(r1) + cfi_offset(fp27,-40) + .globl C_TEXT(_savef28) + .globl C_TEXT(_savefpr_28) +C_TEXT(_savef28): +C_TEXT(_savefpr_28): stfd fp28,-32(r1) + cfi_offset(fp28,-32) + .globl C_TEXT(_savef29) + .globl C_TEXT(_savefpr_29) +C_TEXT(_savef29): +C_TEXT(_savefpr_29): stfd fp29,-24(r1) #save f29 + stfd fp30,-16(r1) #save f30 + stfd fp31,-8(r1) #save f31 + cfi_offset(fp29,-24) + cfi_offset(fp30,-16) + cfi_offset(fp31,-8) + stw r0,8(r1) #save LR in callers frame + blr #return +END (_savefpr_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S new file mode 100644 index 0000000000..51b8c21027 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S @@ -0,0 +1,83 @@ +/* ceil function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__ceil) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S new file mode 100644 index 0000000000..9d8d8aa294 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S @@ -0,0 +1,75 @@ +/* float ceil function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__ceilf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S new file mode 100644 index 0000000000..850dded3b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S @@ -0,0 +1,59 @@ +/* Copy a sign bit between floating-point values. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysign) +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp2,8(r1) + lwz r3,8+HIWORD(r1) + cmpwi r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias (__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias (__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S new file mode 100644 index 0000000000..e05438ae7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S new file mode 100644 index 0000000000..272032b49e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S @@ -0,0 +1,66 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ +#ifdef _ARCH_PPCGR + /* fsel available. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp3,8(r1) + fmr fp0,fp1 + fabs fp1,fp1 + lwz r3,8+HIWORD(r1) + cmpwi cr6,r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + bgelr cr6 + fneg fp1,fp1 + fneg fp2,fp2 + blr +#else + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + stfd fp3,8(r1) + stfd fp1,16(r1) + lwz r3,8+HIWORD(r1) + lwz r4,16+HIWORD(r1) + xor r3,r3,r4 + cmpwi cr6,r3,0 + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bgelr cr6 + fneg fp1,fp1 + fneg fp2,fp2 + blr +#endif +END (__copysignl) + +#if IS_IN (libm) +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S new file mode 100644 index 0000000000..53d21301ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S new file mode 100644 index 0000000000..75608ec70c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S @@ -0,0 +1,52 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ +#ifdef _ARCH_PPCGR + /* fsel available. */ + fmr fp0,fp1 +#else + /* Use integer operations to test sign of high part to avoid + exceptions on sNaNs. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp1,8(r1) +#endif + fabs fp1,fp1 +#ifdef _ARCH_PPCGR + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 +#else + lwz r3,8+HIWORD(r1) + cmpwi cr6,r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + bgelr cr6 + fneg fp2,fp2 +#endif + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S new file mode 100644 index 0000000000..90a1b184df --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S @@ -0,0 +1,83 @@ +/* Floor function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__floor) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S new file mode 100644 index 0000000000..b87e3bf33e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S @@ -0,0 +1,75 @@ +/* float Floor function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__floorf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S new file mode 100644 index 0000000000..d40695c633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fma.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fma, fmal, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S new file mode 100644 index 0000000000..363535dcdb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S @@ -0,0 +1,57 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +#ifndef __isnan +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) +#endif + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c new file mode 100644 index 0000000000..13d150cd68 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c @@ -0,0 +1,63 @@ +/* Round a double value to a long long in the current rounding mode. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <math_ldbl_opt.h> +#include <math_private.h> +#include <stdint.h> + +long long int +__llrint (double x) +{ + double rx = __rint (x); + if (HAVE_PPC_FCTIDZ || rx != x) + return (long long int) rx; + else + { + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (fabs (rx) < 0x1p31) + return (long long int) (long int) rx; + uint64_t i0; + EXTRACT_WORDS64 (i0, rx); + int exponent = ((i0 >> 52) & 0x7ff) - 0x3ff; + if (exponent < 63) + { + unsigned long long int mant + = (i0 & ((1ULL << 52) - 1)) | (1ULL << 52); + if (exponent < 52) + mant >>= 52 - exponent; + else + mant <<= exponent - 52; + return (long long int) ((i0 & (1ULL << 63)) != 0 ? -mant : mant); + } + else if (rx == (double) LLONG_MIN) + return LLONG_MIN; + else + return (long long int) (long int) rx << 32; + } +} +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c new file mode 100644 index 0000000000..46365452a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c @@ -0,0 +1,46 @@ +/* Round a float value to a long long in the current rounding mode. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +long long int +__llrintf (float x) +{ + float rx = __rintf (x); + if (HAVE_PPC_FCTIDZ || rx != x) + return (long long int) rx; + else + { + float arx = fabsf (rx); + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (arx < 0x1p31f) + return (long long int) (long int) rx; + else if (!(arx < 0x1p55f)) + return (long long int) (long int) (rx * 0x1p-32f) << 32; + uint32_t i0; + GET_FLOAT_WORD (i0, rx); + int exponent = ((i0 >> 23) & 0xff) - 0x7f; + unsigned long long int mant = (i0 & 0x7fffff) | 0x800000; + mant <<= exponent - 23; + return (long long int) ((i0 & 0x80000000) != 0 ? -mant : mant); + } +} +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c new file mode 100644 index 0000000000..5e5a237b0c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c @@ -0,0 +1,90 @@ +/* Round double value to long long int. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <math_ldbl_opt.h> +#include <math_private.h> +#include <stdint.h> + +/* Round to the nearest integer, with values exactly on a 0.5 boundary + rounded away from zero, regardless of the current rounding mode. + If (long long)x, when x is out of range of a long long, clips at + LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + +long long int +__llround (double x) +{ + long long xr; + if (HAVE_PPC_FCTIDZ) + xr = (long long) x; + else + { + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (fabs (x) < 0x1p31) + xr = (long long int) (long int) x; + else + { + uint64_t i0; + EXTRACT_WORDS64 (i0, x); + int exponent = ((i0 >> 52) & 0x7ff) - 0x3ff; + if (exponent < 63) + { + unsigned long long int mant + = (i0 & ((1ULL << 52) - 1)) | (1ULL << 52); + if (exponent < 52) + /* llround is not required to raise "inexact". */ + mant >>= 52 - exponent; + else + mant <<= exponent - 52; + xr = (long long int) ((i0 & (1ULL << 63)) != 0 ? -mant : mant); + } + else if (x == (double) LLONG_MIN) + xr = LLONG_MIN; + else + xr = (long long int) (long int) x << 32; + } + } + /* Avoid spurious "inexact" converting LLONG_MAX to double, and from + subtraction when the result is out of range, by returning early + for arguments large enough that no rounding is needed. */ + if (!(fabs (x) < 0x1p52)) + return xr; + double xrf = (double) xr; + + if (x >= 0.0) + { + if (x - xrf >= 0.5) + xr += (long long) ((unsigned long long) xr + 1) > 0; + } + else + { + if (xrf - x >= 0.5) + xr -= (long long) ((unsigned long long) xr - 1) < 0; + } + return xr; +} +weak_alias (__llround, llround) +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c new file mode 100644 index 0000000000..55452bac73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c @@ -0,0 +1,72 @@ +/* Round float value to long long int. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +/* Round to the nearest integer, with values exactly on a 0.5 boundary + rounded away from zero, regardless of the current rounding mode. + If (long long)x, when x is out of range of a long long, clips at + LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + +long long int +__llroundf (float x) +{ + long long xr; + if (HAVE_PPC_FCTIDZ) + xr = (long long) x; + else + { + float ax = fabsf (x); + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (ax < 0x1p31f) + xr = (long long int) (long int) x; + else if (!(ax < 0x1p55f)) + xr = (long long int) (long int) (x * 0x1p-32f) << 32; + else + { + uint32_t i0; + GET_FLOAT_WORD (i0, x); + int exponent = ((i0 >> 23) & 0xff) - 0x7f; + unsigned long long int mant = (i0 & 0x7fffff) | 0x800000; + mant <<= exponent - 23; + xr = (long long int) ((i0 & 0x80000000) != 0 ? -mant : mant); + } + } + /* Avoid spurious "inexact" converting LLONG_MAX to float, and from + subtraction when the result is out of range, by returning early + for arguments large enough that no rounding is needed. */ + if (!(fabsf (x) < 0x1p23f)) + return xr; + float xrf = (float) xr; + + if (x >= 0.0) + { + if (x - xrf >= 0.5) + xr += (long long) ((unsigned long long) xr + 1) > 0; + } + else + { + if (xrf - x >= 0.5) + xr -= (long long) ((unsigned long long) xr - 1) < 0; + } + return xr; +} +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S new file mode 100644 index 0000000000..8d54d95b6a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long int[r3] __lrint (double x[fp1]) */ +ENTRY (__lrint) + stwu r1,-16(r1) + fctiw fp13,fp1 + stfd fp13,8(r1) + nop /* Ensure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lrint) + +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __lrintf) +weak_alias (__lrint, lrintf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S new file mode 100644 index 0000000000..e4ec1bb0b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S @@ -0,0 +1,129 @@ +/* lround function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 0.5 */ + .long 0x3f000000 +.LC1: /* 2^52. */ + .long 0x59800000 + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC2: /* 0x7fffffff.8p0. */ + .long 0x41dfffff + .long 0xffe00000 +.LC3: /* -0x80000000.8p0. */ + .long 0xc1e00000 + .long 0x00100000 + .section ".text" + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. It is necessary to detect when x is + (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will + cause an erroneous shift, carry and round. We simply return 0 if + 0.5 > x > -0.5. */ + +ENTRY (__lround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r10,r9,.LC0-got_label@ha + lfs fp10,.LC0-got_label@l(r10) + addis r10,r9,.LC1-got_label@ha + lfs fp11,.LC1-got_label@l(r10) + addis r10,r9,.LC2-got_label@ha + lfd fp9,.LC2-got_label@l(r10) + addis r10,r9,.LC3-got_label@ha + lfd fp8,.LC3-got_label@l(r10) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp10,.LC0@l(r9) + lis r9,.LC1@ha + lfs fp11,.LC1@l(r9) + lis r9,.LC2@ha + lfd fp9,.LC2@l(r9) + lis r9,.LC3@ha + lfd fp8,.LC3@l(r9) +#endif + fabs fp2, fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0. */ + fcmpu cr6, fp2, fp10 /* if |x| < 0.5 */ + fcmpu cr5, fp1, fp9 /* if x >= 0x7fffffff.8p0 */ + fcmpu cr1, fp1, fp8 /* if x <= -0x80000000.8p0 */ + fcmpu cr7, fp1, fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero + bge- cr5,.Loflow + ble- cr1,.Loflow + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp11 + fsub fp3,fp3,fp11 + fcmpu cr5, fp2, fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr7,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctiwz fp4,fp3 /* Convert to Integer word lround toward 0. */ + stfd fp4,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) /* Load return as integer. */ +.Lout: + addi r1,r1,16 + blr +.Lretzero: /* when 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert +.Loflow: + fmr fp3,fp11 + bge cr7,.Lconvert + fnabs fp3,fp3 + b .Lconvert + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S new file mode 100644 index 0000000000..6289e0be58 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S @@ -0,0 +1 @@ +/* __lroundf is in s_lround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S new file mode 100644 index 0000000000..df590e08bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S @@ -0,0 +1,87 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 /* TWO52: 2**52 */ + + .section ".text" +ENTRY (__nearbyint) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52 */ + bge cr7,.L10 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0 */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +L(lessthanzero): + bgelr cr7 + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyintl) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..fb4c6e4cee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S @@ -0,0 +1,78 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: + .long 0x4B000000 /* TWO23: 2**23 */ + + .section ".text" +ENTRY (__nearbyintf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23 */ + bge cr7,.L10 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +L(lessthanzero): + bgelr cr7 + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S new file mode 100644 index 0000000000..a1c3116447 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S @@ -0,0 +1,76 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__rint) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__rint) + +weak_alias (__rint, rint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__rint, rintl) +strong_alias (__rint, __rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S new file mode 100644 index 0000000000..70e52e894d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S @@ -0,0 +1,65 @@ +/* Round float to int floating-point values. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__rintf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__rintf) + +weak_alias (__rintf, rintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S new file mode 100644 index 0000000000..f539890b17 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S @@ -0,0 +1,104 @@ +/* round function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 +.LC1: /* 0.5 */ + .long 0x3f000000 + +/* double [fp1] round (double x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + + .section ".text" +ENTRY (__round) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp13,0(r9) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ +#ifdef SHARED + lfs fp10,.LC1-.LC0(r9) +#else + lis r9,.LC1@ha + lfs fp10,.LC1@l(r9) +#endif + ble- cr6,.L4 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsub fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S new file mode 100644 index 0000000000..5daf84b598 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S @@ -0,0 +1,95 @@ +/* roundf function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 +.LC1: /* 0.5 */ + .long 0x3f000000 + +/* float [fp1] roundf (float x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + + .section ".text" +ENTRY (__roundf ) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp13,0(r9) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ +#ifdef SHARED + lfs fp10,.LC1-.LC0(r9) +#else + lfs fp10,.LC1@l(r9) +#endif + ble- cr6,.L4 + fadds fp1,fp1,fp10 /* x+= 0.5; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsubs fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp9,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S new file mode 100644 index 0000000000..85d292c03c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S @@ -0,0 +1,90 @@ +/* trunc function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + +/* double [fp1] trunc (double x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**52 then + subtracting +-2**52. */ + + .section ".text" +ENTRY (__trunc) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S new file mode 100644 index 0000000000..9b91e3f0a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S @@ -0,0 +1,82 @@ +/* truncf function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + +/* float [fp1] truncf (float x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**23 then + subtracting +-2**23. */ + + .section ".text" +ENTRY (__truncf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S new file mode 100644 index 0000000000..19ad07ee56 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S @@ -0,0 +1,183 @@ +/* setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + + .machine "altivec" +ENTRY (__sigsetjmp_symbol) + +#ifdef PTR_MANGLE + mr r5,r1 + PTR_MANGLE(r5, r6) + stw r5,(JB_GPR1*4)(3) +#else + stw r1,(JB_GPR1*4)(3) +#endif + mflr r0 + /* setjmp probe expects longjmp first argument (4@3), second argument + (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (setjmp, 3, 4@3, -4@4, 4@0) + stw r14,((JB_GPRS+0)*4)(3) + stfd fp14,((JB_FPRS+0*2)*4)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif + stw r0,(JB_LR*4)(3) + stw r15,((JB_GPRS+1)*4)(3) + stfd fp15,((JB_FPRS+1*2)*4)(3) + mfcr r0 + stw r16,((JB_GPRS+2)*4)(3) + stfd fp16,((JB_FPRS+2*2)*4)(3) + stw r0,(JB_CR*4)(3) + stw r17,((JB_GPRS+3)*4)(3) + stfd fp17,((JB_FPRS+3*2)*4)(3) + stw r18,((JB_GPRS+4)*4)(3) + stfd fp18,((JB_FPRS+4*2)*4)(3) + stw r19,((JB_GPRS+5)*4)(3) + stfd fp19,((JB_FPRS+5*2)*4)(3) + stw r20,((JB_GPRS+6)*4)(3) + stfd fp20,((JB_FPRS+6*2)*4)(3) + stw r21,((JB_GPRS+7)*4)(3) + stfd fp21,((JB_FPRS+7*2)*4)(3) + stw r22,((JB_GPRS+8)*4)(3) + stfd fp22,((JB_FPRS+8*2)*4)(3) + stw r23,((JB_GPRS+9)*4)(3) + stfd fp23,((JB_FPRS+9*2)*4)(3) + stw r24,((JB_GPRS+10)*4)(3) + stfd fp24,((JB_FPRS+10*2)*4)(3) + stw r25,((JB_GPRS+11)*4)(3) + stfd fp25,((JB_FPRS+11*2)*4)(3) + stw r26,((JB_GPRS+12)*4)(3) + stfd fp26,((JB_FPRS+12*2)*4)(3) + stw r27,((JB_GPRS+13)*4)(3) + stfd fp27,((JB_FPRS+13*2)*4)(3) + stw r28,((JB_GPRS+14)*4)(3) + stfd fp28,((JB_FPRS+14*2)*4)(3) + stw r29,((JB_GPRS+15)*4)(3) + stfd fp29,((JB_FPRS+15*2)*4)(3) + stw r30,((JB_GPRS+16)*4)(3) + stfd fp30,((JB_FPRS+16*2)*4)(3) + stw r31,((JB_GPRS+17)*4)(3) + stfd fp31,((JB_FPRS+17*2)*4)(3) +#ifndef __NO_VMX__ +# ifdef PIC + mflr r6 + cfi_register(lr,r6) + SETUP_GOT_ACCESS(r5,got_label) + addis r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@l + mtlr r6 + cfi_same_value (lr) +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + lwz r5,_rtld_local_ro@got(r5) +# else + lwz r5,_rtld_global_ro@got(r5) +# endif + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) +# else + lwz r5,_dl_hwcap@got(r5) + lwz r5,LOWORD(r5) +# endif +# else + lis r6,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r6) +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*4)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE + stw r0,((JB_VRSAVE)*4)(3) + addi r6,r5,16 + beq+ L(aligned_save_vmx) + + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 + +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr + + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ + + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) + + b L(no_vmx) + +L(aligned_save_vmx): + stvx 20,0,r5 + addi r5,r5,32 + stvx 21,0,r6 + addi r6,r6,32 + stvx 22,0,r5 + addi r5,r5,32 + stvx 23,0,r6 + addi r6,r6,32 + stvx 24,0,r5 + addi r5,r5,32 + stvx 25,0,r6 + addi r6,r6,32 + stvx 26,0,r5 + addi r5,r5,32 + stvx 27,0,r6 + addi r6,r6,32 + stvx 28,0,r5 + addi r5,r5,32 + stvx 29,0,r6 + addi r6,r6,32 + stvx 30,0,r5 + stvx 31,0,r6 +L(no_vmx): +#endif + b __sigjmp_save_symbol@local +END (__sigsetjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S new file mode 100644 index 0000000000..02b17d3467 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S @@ -0,0 +1,47 @@ +/* non altivec (old) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +libc_hidden_ver (__vmx__sigsetjmp, __sigsetjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S new file mode 100644 index 0000000000..a379e0248e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S @@ -0,0 +1,69 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) restore routine + when Floating Point Registers (FPRs) are not saved + + Note: This restore routine must not be called when GPR30 or + GPR31, or both, are the only registers beings saved. In these + cases, the saving and restoring must be done inline. +*/ + +#include <sysdep.h> + +ENTRY(_restgpr0_all) + .globl C_TEXT(_restgpr0_13) +C_TEXT(_restgpr0_13): lwz r13,-76(r1) + .globl C_TEXT(_restgpr0_14) +C_TEXT(_restgpr0_14): lwz r14,-72(r1) + .globl C_TEXT(_restgpr0_15) +C_TEXT(_restgpr0_15): lwz r15,-68(r1) + .globl C_TEXT(_restgpr0_16) +C_TEXT(_restgpr0_16): lwz r16,-64(r1) + .globl C_TEXT(_restgpr0_17) +C_TEXT(_restgpr0_17): lwz r17,-60(r1) + .globl C_TEXT(_restgpr0_18) +C_TEXT(_restgpr0_18): lwz r18,-56(r1) + .globl C_TEXT(_restgpr0_19) +C_TEXT(_restgpr0_19): lwz r19,-52(r1) + .globl C_TEXT(_restgpr0_20) +C_TEXT(_restgpr0_20): lwz r20,-48(r1) + .globl C_TEXT(_restgpr0_21) +C_TEXT(_restgpr0_21): lwz r21,-44(r1) + .globl C_TEXT(_restgpr0_22) +C_TEXT(_restgpr0_22): lwz r22,-40(r1) + .globl C_TEXT(_restgpr0_23) +C_TEXT(_restgpr0_23): lwz r23,-36(r1) + .globl C_TEXT(_restgpr0_24) +C_TEXT(_restgpr0_24): lwz r24,-32(r1) + .globl C_TEXT(_restgpr0_25) +C_TEXT(_restgpr0_25): lwz r25,-28(r1) + .globl C_TEXT(_restgpr0_26) +C_TEXT(_restgpr0_26): lwz r26,-24(r1) + .globl C_TEXT(_restgpr0_27) +C_TEXT(_restgpr0_27): lwz r27,-20(r1) + .globl C_TEXT(_restgpr0_28) +C_TEXT(_restgpr0_28): lwz r28,-16(r1) + .globl C_TEXT(_restgpr0_29) +C_TEXT(_restgpr0_29): lwz r0,8(r1) #get return address from frame + lwz r29,-12(r1) #restore r29 + mtlr r0 #move return address to LR + lwz r30,-8(r1) #restore r30 + lwz r31,-4(r1) #restore r31 + blr #return +END (_restgpr0_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S new file mode 100644 index 0000000000..c3ac120ca5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) restore routine + when Floating Point Registers (FPRs) are saved +*/ + +#include <sysdep.h> + +ENTRY(_restgpr1_all) + .globl C_TEXT(_restgpr1_13) +C_TEXT(_restgpr1_13): lwz r13,-76(r12) + .globl C_TEXT(_restgpr1_14) +C_TEXT(_restgpr1_14): lwz r14,-72(r12) + .globl C_TEXT(_restgpr1_15) +C_TEXT(_restgpr1_15): lwz r15,-68(r12) + .globl C_TEXT(_restgpr1_16) +C_TEXT(_restgpr1_16): lwz r16,-64(r12) + .globl C_TEXT(_restgpr1_17) +C_TEXT(_restgpr1_17): lwz r17,-60(r12) + .globl C_TEXT(_restgpr1_18) +C_TEXT(_restgpr1_18): lwz r18,-56(r12) + .globl C_TEXT(_restgpr1_19) +C_TEXT(_restgpr1_19): lwz r19,-52(r12) + .globl C_TEXT(_restgpr1_20) +C_TEXT(_restgpr1_20): lwz r20,-48(r12) + .globl C_TEXT(_restgpr1_21) +C_TEXT(_restgpr1_21): lwz r21,-44(r12) + .globl C_TEXT(_restgpr1_22) +C_TEXT(_restgpr1_22): lwz r22,-40(r12) + .globl C_TEXT(_restgpr1_23) +C_TEXT(_restgpr1_23): lwz r23,-36(r12) + .globl C_TEXT(_restgpr1_24) +C_TEXT(_restgpr1_24): lwz r24,-32(r12) + .globl C_TEXT(_restgpr1_25) +C_TEXT(_restgpr1_25): lwz r25,-28(r12) + .globl C_TEXT(_restgpr1_26) +C_TEXT(_restgpr1_26): lwz r26,-24(r12) + .globl C_TEXT(_restgpr1_27) +C_TEXT(_restgpr1_27): lwz r27,-20(r12) + .globl C_TEXT(_restgpr1_28) +C_TEXT(_restgpr1_28): lwz r28,-16(r12) + .globl C_TEXT(_restgpr1_29) +C_TEXT(_restgpr1_29): lwz r29,-12(r12) #restore r29 + lwz r30,-8(r12) #restore r30 + lwz r31,-4(r12) #restore r31 + blr #return +END (_restgpr1_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S new file mode 100644 index 0000000000..4c474514c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S @@ -0,0 +1,87 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) save routine + when Floating Point Registers (FPRs) are not saved + + Note: This save routine must not be called when GPR30 or + GPR31, or both, are the only registers beings saved. In these + cases, the saving and restoring must be done inline. +*/ + +#include <sysdep.h> + +ENTRY(_savegpr0_all) + .globl C_TEXT(_savegpr0_13) +C_TEXT(_savegpr0_13): stw r13,-76(r1) + cfi_offset(r13,-76) + .globl C_TEXT(_savegpr0_14) +C_TEXT(_savegpr0_14): stw r14,-72(r1) + cfi_offset(r14,-72) + .globl C_TEXT(_savegpr0_15) +C_TEXT(_savegpr0_15): stw r15,-68(r1) + cfi_offset(r15,-68) + .globl C_TEXT(_savegpr0_16) +C_TEXT(_savegpr0_16): stw r16,-64(r1) + cfi_offset(r16,-64) + .globl C_TEXT(_savegpr0_17) +C_TEXT(_savegpr0_17): stw r17,-60(r1) + cfi_offset(r17,-60) + .globl C_TEXT(_savegpr0_18) +C_TEXT(_savegpr0_18): stw r18,-56(r1) + cfi_offset(r18,-56) + .globl C_TEXT(_savegpr0_19) +C_TEXT(_savegpr0_19): stw r19,-52(r1) + cfi_offset(r19,-52) + .globl C_TEXT(_savegpr0_20) +C_TEXT(_savegpr0_20): stw r20,-48(r1) + cfi_offset(r20,-48) + .globl C_TEXT(_savegpr0_21) +C_TEXT(_savegpr0_21): stw r21,-44(r1) + cfi_offset(r21,-44) + .globl C_TEXT(_savegpr0_22) +C_TEXT(_savegpr0_22): stw r22,-40(r1) + cfi_offset(r22,-40) + .globl C_TEXT(_savegpr0_23) +C_TEXT(_savegpr0_23): stw r23,-36(r1) + cfi_offset(r23,-36) + .globl C_TEXT(_savegpr0_24) +C_TEXT(_savegpr0_24): stw r24,-32(r1) + cfi_offset(r24,-32) + .globl C_TEXT(_savegpr0_25) +C_TEXT(_savegpr0_25): stw r25,-28(r1) + cfi_offset(r25,-28) + .globl C_TEXT(_savegpr0_26) +C_TEXT(_savegpr0_26): stw r26,-24(r1) + cfi_offset(r26,-24) + .globl C_TEXT(_savegpr0_27) +C_TEXT(_savegpr0_27): stw r27,-20(r1) + cfi_offset(r27,-20) + .globl C_TEXT(_savegpr0_28) +C_TEXT(_savegpr0_28): stw r28,-16(r1) + cfi_offset(r28,-16) + .globl C_TEXT(_savegpr0_29) +C_TEXT(_savegpr0_29): stw r29,-12(r1) #save r29 + stw r30,-8(r1) #save r30 + stw r31,-4(r1) #save r31 + cfi_offset(r29,-12) + cfi_offset(r30,-8) + cfi_offset(r31,-4) + stw r0,8(r1) #save LR in callers frame + blr #return +END (_savegpr0_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S new file mode 100644 index 0000000000..89da657070 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) save routine + when Floating Point Registers (FPRs) are saved +*/ + +#include <sysdep.h> + +ENTRY(_savegpr1_all) + .globl C_TEXT(_savegpr1_13) +C_TEXT(_savegpr1_13): stw r13,-76(r12) + .globl C_TEXT(_savegpr1_14) +C_TEXT(_savegpr1_14): stw r14,-72(r12) + .globl C_TEXT(_savegpr1_15) +C_TEXT(_savegpr1_15): stw r15,-68(r12) + .globl C_TEXT(_savegpr1_16) +C_TEXT(_savegpr1_16): stw r16,-64(r12) + .globl C_TEXT(_savegpr1_17) +C_TEXT(_savegpr1_17): stw r17,-60(r12) + .globl C_TEXT(_savegpr1_18) +C_TEXT(_savegpr1_18): stw r18,-56(r12) + .globl C_TEXT(_savegpr1_19) +C_TEXT(_savegpr1_19): stw r19,-52(r12) + .globl C_TEXT(_savegpr1_20) +C_TEXT(_savegpr1_20): stw r20,-48(r12) + .globl C_TEXT(_savegpr1_21) +C_TEXT(_savegpr1_21): stw r21,-44(r12) + .globl C_TEXT(_savegpr1_22) +C_TEXT(_savegpr1_22): stw r22,-40(r12) + .globl C_TEXT(_savegpr1_23) +C_TEXT(_savegpr1_23): stw r23,-36(r12) + .globl C_TEXT(_savegpr1_24) +C_TEXT(_savegpr1_24): stw r24,-32(r12) + .globl C_TEXT(_savegpr1_25) +C_TEXT(_savegpr1_25): stw r25,-28(r12) + .globl C_TEXT(_savegpr1_26) +C_TEXT(_savegpr1_26): stw r26,-24(r12) + .globl C_TEXT(_savegpr1_27) +C_TEXT(_savegpr1_27): stw r27,-20(r12) + .globl C_TEXT(_savegpr1_28) +C_TEXT(_savegpr1_28): stw r28,-16(r12) + .globl C_TEXT(_savegpr1_29) +C_TEXT(_savegpr1_29): stw r29,-12(r12) #save r29 + stw r30,-8(r12) #save r30 + stw r31,-4(r12) #save r31 + blr #return +END (_savegpr1_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S new file mode 100644 index 0000000000..431ccaa4b3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S @@ -0,0 +1,137 @@ +/* pre-.hidden libgcc compatibility + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + .file "libgcc-compat.S" + +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2_6) + + .symver __ashldi3_v_glibc20,__ashldi3@GLIBC_2.0 + .symver __ashrdi3_v_glibc20,__ashrdi3@GLIBC_2.0 + .symver __lshrdi3_v_glibc20,__lshrdi3@GLIBC_2.0 + .symver __cmpdi2_v_glibc20,__cmpdi2@GLIBC_2.0 + .symver __ucmpdi2_v_glibc20,__ucmpdi2@GLIBC_2.0 +#if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .symver __fixdfdi_v_glibc20,__fixdfdi@GLIBC_2.0 + .symver __fixunsdfdi_v_glibc20,__fixunsdfdi@GLIBC_2.0 + .symver __fixsfdi_v_glibc20,__fixsfdi@GLIBC_2.0 + .symver __fixunssfdi_v_glibc20,__fixunssfdi@GLIBC_2.0 + .symver __floatdidf_v_glibc20,__floatdidf@GLIBC_2.0 + .symver __floatdisf_v_glibc20,__floatdisf@GLIBC_2.0 +#endif + +#ifdef HAVE_DOT_HIDDEN + .hidden __ashldi3 + .hidden __ashrdi3 + .hidden __lshrdi3 + .hidden __cmpdi2 + .hidden __ucmpdi2 +# if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .hidden __fixdfdi + .hidden __fixsfdi + .hidden __fixunsdfdi + .hidden __fixunssfdi + .hidden __floatdidf + .hidden __floatdisf +# endif +#endif + + .section ".text" + + .align 2 + .globl __ashldi3_v_glibc20 + .type __ashldi3_v_glibc20,@function +__ashldi3_v_glibc20: + b __ashldi3@local +.Lfe5: + .size __ashldi3_v_glibc20,.Lfe5-__ashldi3_v_glibc20 + .align 2 + .globl __ashrdi3_v_glibc20 + .type __ashrdi3_v_glibc20,@function +__ashrdi3_v_glibc20: + b __ashrdi3@local +.Lfe6: + .size __ashrdi3_v_glibc20,.Lfe6-__ashrdi3_v_glibc20 + .align 2 + .globl __lshrdi3_v_glibc20 + .type __lshrdi3_v_glibc20,@function +__lshrdi3_v_glibc20: + b __lshrdi3@local +.Lfe7: + .size __lshrdi3_v_glibc20,.Lfe7-__lshrdi3_v_glibc20 + .align 2 + .globl __cmpdi2_v_glibc20 + .type __cmpdi2_v_glibc20,@function +__cmpdi2_v_glibc20: + b __cmpdi2@local +.Lfe8: + .size __cmpdi2_v_glibc20,.Lfe8-__cmpdi2_v_glibc20 + .align 2 + .globl __ucmpdi2_v_glibc20 + .type __ucmpdi2_v_glibc20,@function +__ucmpdi2_v_glibc20: + b __ucmpdi2@local +.Lfe9: + .size __ucmpdi2_v_glibc20,.Lfe9-__ucmpdi2_v_glibc20 +#if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .align 2 + .globl __fixdfdi_v_glibc20 + .type __fixdfdi_v_glibc20,@function +__fixdfdi_v_glibc20: + b __fixdfdi@local +.Lfe10: + .size __fixdfdi_v_glibc20,.Lfe10-__fixdfdi_v_glibc20 + .align 2 + .globl __fixunsdfdi_v_glibc20 + .type __fixunsdfdi_v_glibc20,@function +__fixunsdfdi_v_glibc20: + b __fixunsdfdi@local +.Lfe11: + .size __fixunsdfdi_v_glibc20,.Lfe11-__fixunsdfdi_v_glibc20 + .align 2 + .globl __fixsfdi_v_glibc20 + .type __fixsfdi_v_glibc20,@function +__fixsfdi_v_glibc20: + b __fixsfdi@local +.Lfe12: + .size __fixsfdi_v_glibc20,.Lfe12-__fixsfdi_v_glibc20 + .align 2 + .globl __fixunssfdi_v_glibc20 + .type __fixunssfdi_v_glibc20,@function +__fixunssfdi_v_glibc20: + b __fixunssfdi@local +.Lfe13: + .size __fixunssfdi_v_glibc20,.Lfe13-__fixunssfdi_v_glibc20 + .align 2 + .globl __floatdidf_v_glibc20 + .type __floatdidf_v_glibc20,@function +__floatdidf_v_glibc20: + b __floatdidf@local +.Lfe14: + .size __floatdidf_v_glibc20,.Lfe14-__floatdidf_v_glibc20 + .align 2 + .globl __floatdisf_v_glibc20 + .type __floatdisf_v_glibc20,@function +__floatdisf_v_glibc20: + b __floatdisf@local +.Lfe15: + .size __floatdisf_v_glibc20,.Lfe15-__floatdisf_v_glibc20 +#endif + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S b/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S new file mode 100644 index 0000000000..78c151f101 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S @@ -0,0 +1,125 @@ +/* Shift a limb left, low level routine. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, + unsigned int cnt) */ + +EALIGN (__mpn_lshift, 3, 0) + + mtctr r5 # copy size into CTR + cmplwi cr0,r5,16 # is size < 16 + slwi r0,r5,2 + add r7,r3,r0 # make r7 point at end of res + add r4,r4,r0 # make r4 point at end of s1 + lwzu r11,-4(r4) # load first s1 limb + subfic r8,r6,32 + srw r3,r11,r8 # compute function return value + bge cr0,L(big) # branch if size >= 16 + + bdz L(end1) + +L(0): lwzu r10,-4(r4) + slw r9,r11,r6 + srw r12,r10,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdz L(end2) + lwzu r11,-4(r4) + slw r9,r10,r6 + srw r12,r11,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdnz L(0) + +L(end1):slw r0,r11,r6 + stw r0,-4(r7) + blr + + +/* Guaranteed not to succeed. */ +L(boom): tweq r0,r0 + +/* We imitate a case statement, by using (yuk!) fixed-length code chunks, + of size 4*12 bytes. We have to do this (or something) to make this PIC. */ +L(big): mflr r9 + cfi_register(lr,r9) + bltl- cr0,L(boom) # Never taken, only used to set LR. + slwi r10,r6,4 + mflr r12 + add r10,r12,r10 + slwi r8,r6,5 + add r10,r8,r10 + mtctr r10 + addi r5,r5,-1 + mtlr r9 + cfi_same_value (lr) + bctr + +L(end2):slw r0,r10,r6 + stw r0,-4(r7) + blr + +#define DO_LSHIFT(n) \ + mtctr r5; \ +L(n): lwzu r10,-4(r4); \ + slwi r9,r11,n; \ + inslwi r9,r10,n,32-n; \ + stwu r9,-4(r7); \ + bdz- L(end2); \ + lwzu r11,-4(r4); \ + slwi r9,r10,n; \ + inslwi r9,r11,n,32-n; \ + stwu r9,-4(r7); \ + bdnz L(n); \ + b L(end1) + + DO_LSHIFT(1) + DO_LSHIFT(2) + DO_LSHIFT(3) + DO_LSHIFT(4) + DO_LSHIFT(5) + DO_LSHIFT(6) + DO_LSHIFT(7) + DO_LSHIFT(8) + DO_LSHIFT(9) + DO_LSHIFT(10) + DO_LSHIFT(11) + DO_LSHIFT(12) + DO_LSHIFT(13) + DO_LSHIFT(14) + DO_LSHIFT(15) + DO_LSHIFT(16) + DO_LSHIFT(17) + DO_LSHIFT(18) + DO_LSHIFT(19) + DO_LSHIFT(20) + DO_LSHIFT(21) + DO_LSHIFT(22) + DO_LSHIFT(23) + DO_LSHIFT(24) + DO_LSHIFT(25) + DO_LSHIFT(26) + DO_LSHIFT(27) + DO_LSHIFT(28) + DO_LSHIFT(29) + DO_LSHIFT(30) + DO_LSHIFT(31) + +END (__mpn_lshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c b/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c new file mode 100644 index 0000000000..d8c063222a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c @@ -0,0 +1,17 @@ +#include <shlib-compat.h> + +#define __mcount_internal ___mcount_internal + +#include <gmon/mcount.c> + +#undef __mcount_internal + +/* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE, + but it should have been put in version GLIBC_2.15. Mark the + GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead. */ +versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16); + +#if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16) +strong_alias (___mcount_internal, ___mcount_internal_private); +symbol_version (___mcount_internal_private, __mcount_internal, GLIBC_PRIVATE); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S new file mode 100644 index 0000000000..8913a02698 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S @@ -0,0 +1,307 @@ +/* Optimized memset implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in four sizes: byte (8 bits), word (32 bits), + 32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits). + There is a special case for setting whole cache lines to 0, which + takes advantage of the dcbz instruction. */ + + .section ".text" +EALIGN (memset, 5, 1) + +#define rTMP r0 +#define rRTN r3 /* initial value of 1st argument */ +#define rMEMP0 r3 /* original value of 1st arg */ +#define rCHR r4 /* char to set in each byte */ +#define rLEN r5 /* length of region to set */ +#define rMEMP r6 /* address at which we are storing */ +#define rALIGN r7 /* number of bytes we are setting now (when aligning) */ +#define rMEMP2 r8 + +#define rPOS32 r7 /* constant +32 for clearing with dcbz */ +#define rNEG64 r8 /* constant -64 for clearing with dcbz */ +#define rNEG32 r9 /* constant -32 for clearing with dcbz */ + +#define rGOT r9 /* Address of the Global Offset Table. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ + +/* take care of case for size <= 4 */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) +/* align to word boundary */ + cmplwi cr5, rLEN, 31 + rlwimi rCHR, rCHR, 8, 16, 23 + beq+ L(aligned) /* 8th instruction from .align */ + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */ +/* take care of case for size < 31 */ +L(aligned): + mtcrf 0x01, rLEN + rlwimi rCHR, rCHR, 16, 0, 15 + ble cr5, L(medium) +/* align to cache line boundary... */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */ + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) +/* now aligned to a cache line. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN /* 40th instruction from .align */ + +/* Check if we can use the special case for clearing memory using dcbz. + This requires that we know the correct cache line size for this + processor. Getting the __cache_line_size may require establishing GOT + addressability, so branch out of line to set this up. */ + beq cr1, L(checklinesize) + +/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary. + Can't assume that rCHR is zero or that the cache line size is either + 32-bytes or even known. */ +L(nondcbz): + srwi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* we may not actually get to do a full line */ + clrlwi. rLEN, rLEN, 27 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) /* 48th instruction from .align */ + +/* We can't use dcbz here as we don't know the cache line size. We can + use "data cache block touch for store", which is safe. */ +L(c3): dcbtst rNEG64, rMEMP + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + nop /* let 601 fetch last 4 instructions of loop */ + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) /* 56th instruction from .align */ + nop /* let 601 fetch first 8 instructions of loop */ + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) /* 64th instruction from .align */ + stw rCHR, -20(rMEMP) + cmplwi cr1, rLEN, 16 + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) /* 72nd instruction from .align */ + + .align 5 + nop +/* Clear cache lines of memory in 128-byte chunks. + This code is optimized for processors with 32-byte cache lines. + It is further optimized for the 601 processor, which requires + some care in how the code is aligned in the i-cache. */ +L(zloopstart): + clrlwi rLEN, rLEN, 27 + mtcrf 0x02, rALIGN + srwi. rTMP, rALIGN, 7 + mtctr rTMP + li rPOS32, 0x20 + li rNEG64, -0x40 + cmplwi cr1, rLEN, 16 /* 8 */ + bf 26, L(z0) + dcbz 0, rMEMP + addi rMEMP, rMEMP, 0x20 +L(z0): li rNEG32, -0x20 + bf 25, L(z1) + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x40 /* 16 */ +L(z1): cmplwi cr5, rLEN, 0 + beq L(medium) +L(zloop): + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x80 + dcbz rNEG64, rMEMP + dcbz rNEG32, rMEMP + bdnz L(zloop) + beqlr cr5 + b L(medium_tail2) + + .align 5 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + nop + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + nop + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + stw rCHR, -4(rMEMP) /* 8th instruction from .align */ + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) /* 16th instruction from .align */ +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(checklinesize): +#ifdef SHARED + mflr rTMP +/* If the remaining length is less the 32 bytes then don't bother getting + the cache line size. */ + beq L(medium) +/* Establishes GOT addressability so we can load __cache_line_size + from static. This value was set from the aux vector during startup. */ + SETUP_GOT_ACCESS(rGOT,got_label) + addis rGOT,rGOT,__cache_line_size-got_label@ha + lwz rCLS,__cache_line_size-got_label@l(rGOT) + mtlr rTMP +#else +/* Load __cache_line_size from static. This value was set from the + aux vector during startup. */ + lis rCLS,__cache_line_size@ha +/* If the remaining length is less the 32 bytes then don't bother getting + the cache line size. */ + beq L(medium) + lwz rCLS,__cache_line_size@l(rCLS) +#endif + +/* If the cache line size was not set then goto to L(nondcbz), which is + safe for any cache line size. */ + cmplwi cr1,rCLS,0 + beq cr1,L(nondcbz) + +/* If the cache line size is 32 bytes then goto to L(zloopstart), + which is coded specifically for 32-byte lines (and 601). */ + cmplwi cr1,rCLS,32 + beq cr1,L(zloopstart) + +/* Now we know the cache line size and it is not 32-bytes. However + we may not yet be aligned to the cache line and may have a partial + line to fill. Touch it 1st to fetch the cache line. */ + dcbtst 0,rMEMP + + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmplwi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) +/* We are not aligned to start of a cache line yet. Store 32-byte + of data and test again. */ + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,-32(rMEMP) + stw rCHR,-28(rMEMP) + stw rCHR,-24(rMEMP) + stw rCHR,-20(rMEMP) + stw rCHR,-16(rMEMP) + stw rCHR,-12(rMEMP) + stw rCHR,-8(rMEMP) + stw rCHR,-4(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmplw cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because; the cache line size was set, it was not + 32-bytes, and the remainder (rLEN) is now less than the actual cache + line size. Set up the preconditions for L(nondcbz) and go there to + store the remaining bytes. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S new file mode 100644 index 0000000000..c3093e2bc6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S @@ -0,0 +1,45 @@ +/* Multiply a limb vector by a limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate s1*s2 and put result in res_ptr; return carry. */ + +ENTRY (__mpn_mul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + addi r3,r3,-4 # adjust res_ptr + addic r5,r5,0 # clear cy with dummy insn + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r7,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + bdnz L(0) + +L(1): stw r7,4(r3) + addze r3,r10 + blr +END (__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies new file mode 100644 index 0000000000..a372141bb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile new file mode 100644 index 0000000000..ba06adb5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile @@ -0,0 +1,6 @@ +# Makefile fragment for POWER4/5/5+. + +ifeq ($(subdir),string) +CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile new file mode 100644 index 0000000000..5afbade15f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile @@ -0,0 +1,43 @@ +ifeq ($(subdir),math) +sysdep_routines += s_isnan-power7 s_isnan-power6 s_isnan-power5 s_isnan-ppc32 \ + s_isnanf-power6 s_isnanf-power5 s_isinf-power7 \ + s_isinf-ppc32 s_isinff-ppc32 s_finite-power7 \ + s_finite-ppc32 s_finitef-ppc32 s_copysign-power6 \ + s_copysign-ppc32 s_modf-power5+ s_modf-ppc32 \ + s_modff-power5+ s_modff-ppc32 + +libm-sysdep_routines += s_llrintf-power6 s_llrintf-ppc32 s_llrint-power6 \ + s_llrint-ppc32 s_llround-power6 s_llround-power5+ \ + s_llround-ppc32 s_isnan-power7 \ + w_sqrt_compat-power5 w_sqrt_compat-ppc32 \ + w_sqrtf_compat-power5 w_sqrtf_compat-ppc32 \ + s_isnan-power6 s_isnan-power5 s_isnan-ppc32 \ + s_isnanf-power6 s_isnanf-power5 s_isinf-power7 \ + s_isinf-ppc32 s_isinff-ppc32 s_finite-power7 \ + s_finite-ppc32 s_finitef-ppc32 s_ceil-power5+ \ + s_ceil-ppc32 s_ceilf-power5+ s_ceilf-ppc32 \ + s_floor-power5+ s_floor-ppc32 s_floorf-power5+ \ + s_floorf-ppc32 s_round-power5+ s_round-ppc32 \ + s_roundf-power5+ s_roundf-ppc32 s_trunc-power5+ \ + s_trunc-ppc32 s_truncf-power5+ s_truncf-ppc32 \ + s_copysign-power6 s_copysign-ppc32 s_lround-power6x \ + s_lround-power5+ s_lround-ppc32 s_lrint-power6x \ + s_lrint-ppc32 s_modf-power5+ s_modf-ppc32 \ + s_modff-power5+ s_modff-ppc32 s_logbl-power7 \ + s_logbl-ppc32 s_logb-power7 s_logb-ppc32 \ + s_logbf-power7 s_logbf-ppc32 e_hypot-power7 \ + e_hypot-ppc32 e_hypotf-power7 e_hypotf-ppc32 + +CFLAGS-s_modf-power5+.c = -mcpu=power5+ +CFLAGS-s_modff-power5+.c = -mcpu=power5+ +CFLAGS-s_logbl-power7.c = -mcpu=power7 +CFLAGS-s_logb-power7.c = -mcpu=power7 +CFLAGS-s_logbf-power7.c = -mcpu=power7 +CFLAGS-e_hypot-power7.c = -mcpu=power7 +CFLAGS-e_hypotf-power7.c = -mcpu=power7 + +# These files quiet sNaNs in a way that is optimized away without +# -fsignaling-nans. +CFLAGS-s_modf-ppc32.c += -fsignaling-nans +CFLAGS-s_modff-ppc32.c += -fsignaling-nans +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c new file mode 100644 index 0000000000..d62b7c4f7b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_power7 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c new file mode 100644 index 0000000000..25984b724c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_ppc32 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c new file mode 100644 index 0000000000..5b8a06b4d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc32 attribute_hidden; +extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypot, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypot_power7 + : __ieee754_hypot_ppc32); + +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c new file mode 100644 index 0000000000..f52bc635d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c @@ -0,0 +1,26 @@ +/* __ieee754_hypot POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_power7 + +#include <sysdeps/powerpc/fpu/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c new file mode 100644 index 0000000000..d9f86163c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_ppc32 + +#include <sysdeps/ieee754/flt-32/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c new file mode 100644 index 0000000000..d1fc0ce532 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypotf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc32 attribute_hidden; +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypotf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypotf_power7 + : __ieee754_hypotf_ppc32); + +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S new file mode 100644 index 0000000000..670d6bbffb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S @@ -0,0 +1,33 @@ +/* ceil function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __ceil __ceil_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S new file mode 100644 index 0000000000..77d43c5de7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..4e3d980ce6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c @@ -0,0 +1,40 @@ +/* Multiple versions of ceil. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceil) __ceil_ppc32 attribute_hidden; +extern __typeof (__ceil) __ceil_power5plus attribute_hidden; + +libc_ifunc (__ceil, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceil_power5plus + : __ceil_ppc32); + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S new file mode 100644 index 0000000000..089261460e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __ceilf __ceilf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S new file mode 100644 index 0000000000..c783919f3a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S @@ -0,0 +1,27 @@ +/* ceilf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..9674001caa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ceilf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceilf) __ceilf_ppc32 attribute_hidden; +extern __typeof (__ceilf) __ceilf_power5plus attribute_hidden; + +libc_ifunc (__ceilf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceilf_power5plus + : __ceilf_ppc32); + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S new file mode 100644 index 0000000000..1f58420763 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S @@ -0,0 +1,33 @@ +/* copysign(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __copysign __copysign_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S new file mode 100644 index 0000000000..5d46f0379a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S @@ -0,0 +1,34 @@ +/* copysign(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __copysign __copysign_ppc32 +#undef hidden_def +#define hidden_def(name) + strong_alias (__copysign_ppc32, __GI___copysign) + +#include <sysdeps/powerpc/powerpc32/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c new file mode 100644 index 0000000000..bddc1ab3c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c @@ -0,0 +1,51 @@ +/* Multiple versions of copysign. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine copysign so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#undef __copysign +#define __copysign __redirect_copysign +#include <math.h> +#include <math_ldbl_opt.h> +#undef __copysign +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__redirect_copysign) __copysign_ppc32 attribute_hidden; +extern __typeof (__redirect_copysign) __copysign_power6 attribute_hidden; + +extern __typeof (__redirect_copysign) __libm_copysign; +libc_ifunc (__libm_copysign, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc32); + +strong_alias (__libm_copysign, __copysign) +weak_alias (__copysign, copysign) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c new file mode 100644 index 0000000000..7709e08968 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c @@ -0,0 +1,32 @@ +/* Multiple versions of copysignf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__copysignf) __copysign_ppc32 attribute_hidden; +extern __typeof (__copysignf) __copysign_power6 attribute_hidden; + +libc_ifunc (__copysignf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc32); + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S new file mode 100644 index 0000000000..eb23f73e6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S @@ -0,0 +1,33 @@ +/* finite(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __finite __finite_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c new file mode 100644 index 0000000000..495dd36d0b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c @@ -0,0 +1,33 @@ +/* finite(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define FINITE __finite_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finite_ppc32, __GI___finite, __finite_ppc32); +#endif + +#include <sysdeps/ieee754/dbl-64/s_finite.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c new file mode 100644 index 0000000000..d3d0755bc0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c @@ -0,0 +1,57 @@ +/* Multiple versions of finite. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finite __redirect___finite +#define __finitef __redirect___finitef +#define __finitel __redirect___finitel +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finite) __finite_ppc32 attribute_hidden; +extern __typeof (__finite) __finite_power7 attribute_hidden; +#undef __finite +#undef __finitef +#undef __finitel + +libc_ifunc_redirected (__redirect___finite, __finite, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finite_ppc32); + +weak_alias (__finite, finite) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0); +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1); +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c new file mode 100644 index 0000000000..4a52949066 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c @@ -0,0 +1,31 @@ +/* finitef(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define FINITEF __finitef_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finitef_ppc32, __GI___finitef, __finitef_ppc32); +#endif + +#include <sysdeps/ieee754/flt-32/s_finitef.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c new file mode 100644 index 0000000000..fa214f37ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c @@ -0,0 +1,34 @@ +/* Multiple versions of finitef. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finitef __redirect___finitef +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finitef) __finitef_ppc32 attribute_hidden; +/* The power7 finite(double) works for float. */ +extern __typeof (__finitef) __finite_power7 attribute_hidden; +#undef __finitef + +libc_ifunc_redirected (__redirect___finitef, __finitef, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finitef_ppc32); + +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S new file mode 100644 index 0000000000..dfecd1c59e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S @@ -0,0 +1,33 @@ +/* floor function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __floor __floor_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S new file mode 100644 index 0000000000..3af604ca25 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..0da528c922 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c @@ -0,0 +1,40 @@ +/* Multiple versions of floor. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floor) __floor_ppc32 attribute_hidden; +extern __typeof (__floor) __floor_power5plus attribute_hidden; + +libc_ifunc (__floor, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floor_power5plus + : __floor_ppc32); + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S new file mode 100644 index 0000000000..fd2fa331bb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S @@ -0,0 +1,26 @@ +/* floorf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __floorf __floorf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S new file mode 100644 index 0000000000..55bee8652b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S @@ -0,0 +1,27 @@ +/* floorf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..56375097f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Multiple versions of floorf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floorf) __floorf_ppc32 attribute_hidden; +extern __typeof (__floorf) __floorf_power5plus attribute_hidden; + +libc_ifunc (__floorf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floorf_power5plus + : __floorf_ppc32); + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S new file mode 100644 index 0000000000..f7c7510649 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c new file mode 100644 index 0000000000..0d1cb75cf3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c @@ -0,0 +1,33 @@ +/* isinf(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __isinf __isinf_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinf_ppc32, __GI___isinf, __isinf_ppc32); +#endif + +#include <sysdeps/ieee754/dbl-64/s_isinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c new file mode 100644 index 0000000000..c7d7568ce0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c @@ -0,0 +1,50 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinf __redirect___isinf +#define __isinff __redirect___isinff +#define __isinfl __redirect___isinfl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinf) __isinf_ppc32 attribute_hidden; +extern __typeof (__isinf) __isinf_power7 attribute_hidden; +#undef __isinf +#undef __isinff +#undef __isinfl + +libc_ifunc_redirected (__redirect___isinf, __isinf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinf_ppc32); + +weak_alias (__isinf, isinf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c new file mode 100644 index 0000000000..25fd22d0c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c @@ -0,0 +1,31 @@ +/* isinff(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __isinff __isinff_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinff_ppc32, __GI___isinff, __isinff_ppc32); +#endif + +#include <sysdeps/ieee754/flt-32/s_isinff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c new file mode 100644 index 0000000000..fd6e9983f6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c @@ -0,0 +1,35 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinff __redirect___isinff +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinff) __isinff_ppc32 attribute_hidden; +/* The power7 isinf(double) works for float. */ +extern __typeof (__isinff) __isinf_power7 attribute_hidden; +#undef __isinff + +libc_ifunc_redirected (__redirect___isinff, __isinff, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinff_ppc32); + +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S new file mode 100644 index 0000000000..36d6709ab0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S new file mode 100644 index 0000000000..0ee970330c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S new file mode 100644 index 0000000000..24d5a21d73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S new file mode 100644 index 0000000000..175229edd6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S @@ -0,0 +1,32 @@ +/* isnan(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __isnan __isnan_ppc32 +#undef hidden_def +#define hidden_def(name) + strong_alias (__isnan_ppc32, __GI___isnan) + +#include <sysdeps/powerpc/powerpc32/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c new file mode 100644 index 0000000000..79447af535 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c @@ -0,0 +1,56 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isnan __redirect___isnan +#define __isnanf __redirect___isnanf +#define __isnanl __redirect___isnanl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isnan) __isnan_ppc32 attribute_hidden; +extern __typeof (__isnan) __isnan_power5 attribute_hidden; +extern __typeof (__isnan) __isnan_power6 attribute_hidden; +extern __typeof (__isnan) __isnan_power7 attribute_hidden; +#undef __isnan +#undef __isnanf +#undef __isnanl + +libc_ifunc_redirected (__redirect___isnan, __isnan, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc32); + +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S new file mode 100644 index 0000000000..4e57289794 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S @@ -0,0 +1,28 @@ +/* isnanf(). PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#define __isnanf __isnanf_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S new file mode 100644 index 0000000000..40687b5f43 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S @@ -0,0 +1,28 @@ +/* isnanf(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#define __isnanf __isnanf_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c new file mode 100644 index 0000000000..12bdcffcec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c @@ -0,0 +1,39 @@ +/* Multiple versions of isnanf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* Both ppc32 and power7 isnan(double) work for float. */ +extern __typeof (__isnanf) __isnan_ppc32 attribute_hidden; +extern __typeof (__isnanf) __isnanf_power5 attribute_hidden; +extern __typeof (__isnanf) __isnanf_power6 attribute_hidden; +extern __typeof (__isnanf) __isnan_power7 attribute_hidden; + +libc_ifunc_hidden (__isnanf, __isnanf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnanf_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnanf_power5 + : __isnan_ppc32); + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S new file mode 100644 index 0000000000..07c0f94a2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32/Power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S new file mode 100644 index 0000000000..390cd9a8bc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S @@ -0,0 +1,31 @@ +/* llrint function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c new file mode 100644 index 0000000000..88357ebdd9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c @@ -0,0 +1,40 @@ +/* Multiple versions of llrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrint) __llrint_ppc32 attribute_hidden; +extern __typeof (__llrint) __llrint_power6 attribute_hidden; + +libc_ifunc (__llrint, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llrint_power6 + : __llrint_ppc32); + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S new file mode 100644 index 0000000000..8ebbefd3dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S @@ -0,0 +1,26 @@ +/* Round float to long int. PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __llrintf __llrintf_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S new file mode 100644 index 0000000000..aa66e1ed9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S @@ -0,0 +1,26 @@ +/* llrintf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __llrintf __llrintf_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c new file mode 100644 index 0000000000..f513e61944 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of llrintf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrintf) __llrintf_ppc32 attribute_hidden; +extern __typeof (__llrintf) __llrintf_power6 attribute_hidden; + +libc_ifunc (__llrintf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llrintf_power6 + : __llrintf_ppc32); + +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S new file mode 100644 index 0000000000..16e3124a3c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32/POWER5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S new file mode 100644 index 0000000000..508c6b7a29 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S new file mode 100644 index 0000000000..4ecd2a266f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S @@ -0,0 +1,31 @@ +/* llround function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c new file mode 100644 index 0000000000..caf8953c81 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c @@ -0,0 +1,43 @@ +/* Multiple versions of llround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llround) __llround_ppc32 attribute_hidden; +extern __typeof (__llround) __llround_power5plus attribute_hidden; +extern __typeof (__llround) __llround_power6 attribute_hidden; + +libc_ifunc (__llround, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llround_power6 : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc32); + +weak_alias (__llround, llround) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c new file mode 100644 index 0000000000..1b7e45653a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c @@ -0,0 +1,34 @@ +/* Multiple versions of llroundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__llroundf) __llround_ppc32 attribute_hidden; +extern __typeof (__llroundf) __llround_power5plus attribute_hidden; +extern __typeof (__llroundf) __llround_power6 attribute_hidden; + +libc_ifunc (__llroundf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llround_power6 : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc32); + +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c new file mode 100644 index 0000000000..20fd02a5ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c @@ -0,0 +1,31 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __logb __logb_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c new file mode 100644 index 0000000000..3920579dbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c @@ -0,0 +1,28 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __logb __logb_ppc32 + +#include <sysdeps/ieee754/dbl-64/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c new file mode 100644 index 0000000000..fddd1ecbec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c @@ -0,0 +1,41 @@ +/* Multiple versions of logb. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logb) __logb_ppc32 attribute_hidden; +extern __typeof (__logb) __logb_power7 attribute_hidden; + +libc_ifunc (__logb, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logb_power7 + : __logb_ppc32); + +weak_alias (__logb, logb) + +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c new file mode 100644 index 0000000000..6e064bedbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c new file mode 100644 index 0000000000..ca9865d784 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC32 default implementation. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_ppc32 + +#include <sysdeps/ieee754/flt-32/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c new file mode 100644 index 0000000000..3b9de174bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbf) __logbf_ppc32 attribute_hidden; +extern __typeof (__logbf) __logbf_power7 attribute_hidden; + +libc_ifunc (__logbf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbf_power7 + : __logbf_ppc32); + +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c new file mode 100644 index 0000000000..547664dd4b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c new file mode 100644 index 0000000000..c4361226dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_ppc32 + +#include <sysdeps/ieee754/ldbl-128ibm/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c new file mode 100644 index 0000000000..167e9535cb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbl. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbl) __logbl_ppc32 attribute_hidden; +extern __typeof (__logbl) __logbl_power7 attribute_hidden; + +libc_ifunc (__logbl, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbl_power7 + : __logbl_ppc32); + +long_double_symbol (libm, __logbl, logbl); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S new file mode 100644 index 0000000000..3be812e5dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S @@ -0,0 +1,33 @@ +/* Round double to long int. POWER6x PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lrint __lrint_power6x + +#include <sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S new file mode 100644 index 0000000000..ee5725db03 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __lrint __lrint_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_lrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c new file mode 100644 index 0000000000..ec7c991464 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c @@ -0,0 +1,40 @@ +/* Multiple versions of lrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__lrint) __lrint_ppc32 attribute_hidden; +extern __typeof (__lrint) __lrint_power6x attribute_hidden; + +libc_ifunc (__lrint, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lrint_power6x + : __lrint_ppc32); + +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lrint, lrintl) +strong_alias (__lrint, __lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c new file mode 100644 index 0000000000..4a7fa4bcfa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of lrintf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__lrintf) __lrint_ppc32 attribute_hidden; +extern __typeof (__lrintf) __lrint_power6x attribute_hidden; + +libc_ifunc (__lrintf, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lrint_power6x + : __lrint_ppc32); + +weak_alias (__lrintf, lrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S new file mode 100644 index 0000000000..7aa2364183 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S @@ -0,0 +1,33 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lround __lround_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S new file mode 100644 index 0000000000..a9d54d560d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S @@ -0,0 +1,33 @@ +/* lround function. POWER6x, PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lround __lround_power6x + +#include <sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S new file mode 100644 index 0000000000..78a931238a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __lround __lround_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c new file mode 100644 index 0000000000..fdc0c3dd8d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c @@ -0,0 +1,43 @@ +/* Multiple versions of lround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__lround) __lround_ppc32 attribute_hidden; +extern __typeof (__lround) __lround_power5plus attribute_hidden; +extern __typeof (__lround) __lround_power6x attribute_hidden; + +libc_ifunc (__lround, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lround_power6x + : (hwcap & PPC_FEATURE_POWER5_PLUS) ? + __lround_power5plus + : __lround_ppc32); + +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c new file mode 100644 index 0000000000..ff61dd6ca7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c @@ -0,0 +1,34 @@ +/* Multiple versions of lroundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__lroundf) __lround_ppc32 attribute_hidden; +extern __typeof (__lroundf) __lround_power5plus attribute_hidden; +extern __typeof (__lroundf) __lround_power6x attribute_hidden; + +libc_ifunc (__lroundf, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lround_power6x + : (hwcap & PPC_FEATURE_POWER5_PLUS) ? + __lround_power5plus + : __lround_ppc32); + +weak_alias (__lroundf, lroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c new file mode 100644 index 0000000000..955b265045 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c @@ -0,0 +1,31 @@ +/* PowerPC/POWER5+ implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __modf __modf_power5plus + +#include <sysdeps/powerpc/power5+/fpu/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c new file mode 100644 index 0000000000..6561fdf8e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c @@ -0,0 +1,29 @@ +/* PowerPC32 default implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __modf __modf_ppc32 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..537592ab16 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c @@ -0,0 +1,44 @@ +/* Multiple versions of modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__modf) __modf_ppc32 attribute_hidden; +extern __typeof (__modf) __modf_power5plus attribute_hidden; + +libc_ifunc (__modf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modf_power5plus + : __modf_ppc32); + +weak_alias (__modf, modf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c new file mode 100644 index 0000000000..f5a12a282a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c @@ -0,0 +1,27 @@ +/* PowerPC/POWER5+ implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_power5plus + +#include <sysdeps/powerpc/power5+/fpu/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c new file mode 100644 index 0000000000..9b9fa971bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c @@ -0,0 +1,26 @@ +/* PowerPC32 default implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_ppc32 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..7ae682d124 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c @@ -0,0 +1,30 @@ +/* Multiple versions of modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +extern __typeof (__modff) __modff_ppc32 attribute_hidden; +extern __typeof (__modff) __modff_power5plus attribute_hidden; + +libc_ifunc (__modff, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modff_power5plus + : __modff_ppc32); + +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S new file mode 100644 index 0000000000..02ab78b33c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S @@ -0,0 +1,33 @@ +/* round function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __round __round_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S new file mode 100644 index 0000000000..b9e5bb6170 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S @@ -0,0 +1,31 @@ +/* round function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c new file mode 100644 index 0000000000..46102862ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c @@ -0,0 +1,40 @@ +/* Multiple versions of round. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__round) __round_ppc32 attribute_hidden; +extern __typeof (__round) __round_power5plus attribute_hidden; + +libc_ifunc (__round, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __round_power5plus + : __round_ppc32); + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +strong_alias (__round, __roundl) +weak_alias (__round, roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S new file mode 100644 index 0000000000..442af4c1ea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __roundf __roundf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S new file mode 100644 index 0000000000..abe74e2e1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S @@ -0,0 +1,27 @@ +/* roundf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c new file mode 100644 index 0000000000..0a2e6d53cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c @@ -0,0 +1,32 @@ +/* Multiple versions of roundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__roundf) __roundf_ppc32 attribute_hidden; +extern __typeof (__roundf) __roundf_power5plus attribute_hidden; + +libc_ifunc (__roundf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __roundf_power5plus + : __roundf_ppc32); + +weak_alias (__roundf, roundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S new file mode 100644 index 0000000000..129570ca34 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S @@ -0,0 +1,33 @@ +/* trunc function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __trunc __trunc_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S new file mode 100644 index 0000000000..5e74248a9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..110e701218 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c @@ -0,0 +1,40 @@ +/* Multiple versions of trunc. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__trunc) __trunc_ppc32 attribute_hidden; +extern __typeof (__trunc) __trunc_power5plus attribute_hidden; + +libc_ifunc (__trunc, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __trunc_power5plus + : __trunc_ppc32); + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S new file mode 100644 index 0000000000..57ab878876 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __truncf __truncf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S new file mode 100644 index 0000000000..4dd0a6021a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S @@ -0,0 +1,27 @@ +/* truncf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..ef6e97d000 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Multiple versions of truncf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__truncf) __truncf_ppc32 attribute_hidden; +extern __typeof (__truncf) __truncf_power5plus attribute_hidden; + +libc_ifunc (__truncf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __truncf_power5plus + : __truncf_ppc32); + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S new file mode 100644 index 0000000000..7c5a504177 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S @@ -0,0 +1,31 @@ +/* sqrt function. PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __sqrt __sqrt_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S new file mode 100644 index 0000000000..534e934ac9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S @@ -0,0 +1,31 @@ +/* sqrt function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __sqrt __sqrt_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c new file mode 100644 index 0000000000..1e1892034e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c @@ -0,0 +1,40 @@ +/* Multiple versions of sqrt. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sqrt) __sqrt_ppc32 attribute_hidden; +extern __typeof (__sqrt) __sqrt_power5 attribute_hidden; + +libc_ifunc (__sqrt, + (hwcap & PPC_FEATURE_POWER5) + ? __sqrt_power5 + : __sqrt_ppc32); + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +strong_alias (__sqrt, __sqrtl) +weak_alias (__sqrt, sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S new file mode 100644 index 0000000000..eacc042850 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S @@ -0,0 +1,26 @@ +/* sqrtf function. PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __sqrtf __sqrtf_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S new file mode 100644 index 0000000000..72191fc9a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S @@ -0,0 +1,26 @@ +/* sqrtf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __sqrtf __sqrtf_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c new file mode 100644 index 0000000000..bbab4d4f93 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c @@ -0,0 +1,32 @@ +/* Multiple versions of sqrtf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sqrtf) __sqrtf_ppc32 attribute_hidden; +extern __typeof (__sqrtf) __sqrtf_power5 attribute_hidden; + +libc_ifunc (__sqrtf, + (hwcap & PPC_FEATURE_POWER5) + ? __sqrtf_power5 + : __sqrtf_ppc32); + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S new file mode 100644 index 0000000000..d16dbb8406 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3, r4] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S new file mode 100644 index 0000000000..9c3dd77863 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S @@ -0,0 +1,38 @@ +/* Round float to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3, r4] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) + +weak_alias (__llrintf, llrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S new file mode 100644 index 0000000000..24bd533748 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S @@ -0,0 +1,106 @@ +/* llround function. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 3 + .LC0: .long (52+127)<<23 /* 0x1p+52 */ + .long (-1+127)<<23 /* 0.5 */ + + .section ".text" + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^52 and 2^53-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^52 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp9,0(r9) + lfs fp10,4(r9) +#else + lis r9,.LC0@ha + lfs fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */ + lfs fp10,.LC0@l+4(r9) /* Load 0.5 into fpr10. */ +#endif + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fpr12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^52 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^52 > x < -2^52 just convert with no bias. */ + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,8(r1) + nop + nop + nop + lwz r3,8+HIWORD(r1) /* Load return as integer. */ + lwz r4,8+LOWORD(r1) +.Lout: + addi r1,r1,16 + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + li r4,0 + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S new file mode 100644 index 0000000000..72d6181541 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S new file mode 100644 index 0000000000..bb896a33cd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S @@ -0,0 +1,108 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrt will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (double parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fsqrt fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,26 + fcmpu cr7,fp1,fp0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S new file mode 100644 index 0000000000..c304ab5ca2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S @@ -0,0 +1,100 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrts will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fsqrts fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,126 + fcmpu cr7,1,0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h new file mode 100644 index 0000000000..93cce4625a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h @@ -0,0 +1,54 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruction. But we are not interested + in accurate clock cycles here so we don't do this. */ + +#define HP_TIMING_NOW(Var) \ + do { \ + unsigned int hi, lo, tmp; \ + __asm__ __volatile__ ("1: mfspr %0,269;" \ + " mfspr %1,268;" \ + " mfspr %2,269;" \ + " cmpw %0,%2;" \ + " bne 1b;" \ + : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \ + : : "cr0"); \ + Var = ((hp_timing_t) hi << 32) | lo; \ + } while (0) + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S new file mode 100644 index 0000000000..c6270d347f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S @@ -0,0 +1,1375 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power4 +EALIGN (memcmp, 4, 0) + CALL_MCOUNT + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmplwi cr6, rN, 0 + cmplwi cr1, rN, 12 + clrlwi. r0, r0, 30 + clrlwi r12, rSTR1, 30 + cmplwi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) + stw rWORD8, 48(r1) + stw rWORD7, 44(r1) + cfi_offset(rWORD8, (48-64)) + cfi_offset(rWORD7, (44-64)) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet word aligned). So we force the string addresses to the next lower + word boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrwi rSTR1, rSTR1, 2 + clrrwi rSTR2, rSTR2, 2 + beq cr5, L(Waligned) + add rN, rN, r12 + slwi rWORD6, r12, 3 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the word remainder */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 4 */ + .align 3 +L(dsP1): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 8 */ + .align 4 +L(dPs2): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 12 */ + .align 4 +L(dPs3): + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD2, rWORD6 + cmplw cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD2, rWORD6 + cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Waligned): + andi. r12, rN, 12 /* Get the word remainder */ + srwi r0, rN, 4 /* Divide by 16 */ + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 4 */ + .align 4 +L(dP1): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + .align 3 +L(dP1x): + slwi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 8 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP2): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + slwi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 12 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP3): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 0(rSTR1) + lwz rWORD4, 0(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + slwi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + bne cr7, L(dLcr7x) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne- cr1, L(dLcr1) + cmplw cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + +L(dL4): + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmplw cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + slwi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use + shift right to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr7): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr7x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr1): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr1x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr1 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr6): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr6x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr6 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr5): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr5x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) + cmplw cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) + cmplw cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne- cr7, L(bLcr7) + + cmplw cr6, rWORD5, rWORD6 + bdz- L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + + cmplw cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne- cr6, L(bLcr6) + + cmplw cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne- cr6, L(bLcr6) + bne- cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne- cr7, L(bx12) + bne- cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne- cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not already word aligned yet. + So we can force the string addresses to the next lower word + boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + cfi_adjust_cfa_offset(64) +L(unaligned): + stw rSHL, 40(r1) + cfi_offset(rSHL, (40-64)) + clrlwi rSHL, rSTR2, 30 + stw rSHR, 36(r1) + cfi_offset(rSHR, (36-64)) + beq cr5, L(Wunaligned) + stw rWORD8_SHIFT, 32(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ + clrlwi rSHL, rWORD8_SHIFT, 30 + clrrwi rSTR1, rSTR1, 2 + stw rWORD4_SHIFT, 24(r1) + slwi rSHL, rSHL, 3 + cmplw cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + slwi rWORD6, r12, 3 + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + subfic rSHR, rSHL, 32 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the W remainder */ +/* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 4 +#endif + slw rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + srw r12, rWORD2, rSHR + clrlwi rN, rN, 30 + beq L(duPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 4 */ + .align 4 +L(dusP1): + slw rWORD8_SHIFT, rWORD2, rSHL + slw rWORD7, rWORD1, rWORD6 + slw rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duPs2): + slw rWORD6_SHIFT, rWORD2, rSHL + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 12 */ + .align 4 +L(duPs3): + slw rWORD4_SHIFT, rWORD2, rSHL + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + slw rWORD2_SHIFT, rWORD2, rSHL + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Wunaligned): + stw rWORD8_SHIFT, 32(r1) + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) + srwi r0, rN, 4 /* Divide by 16 */ + stw rWORD4_SHIFT, 24(r1) + andi. r12, rN, 12 /* Get the W remainder */ + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + slwi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 4 + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD6, 0(rSTR2) + lwzu rWORD8, 4(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + subfic rSHR, rSHL, 32 + slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 4 */ + .align 4 +L(duP1): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD7, 0(rSTR1) +#endif + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 8(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duP2): + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + slw rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmplw cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 12 */ + .align 4 +L(duP3): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD3, 0(rSTR1) +#endif + slw rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD1, 0(rSTR1) +#endif + slw rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmplw cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmplw cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + slwi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. + This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 +#else + lwz rWORD1, 4(rSTR1) +#endif + lwz rWORD8, 48(r1) + subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + lwz rWORD7, 44(r1) + lwz rSHL, 40(r1) + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + lwz rSHR, 36(r1) + lwz rWORD8_SHIFT, 32(r1) + sub rRTN, rWORD1, rWORD2 + b L(dureturn26) + .align 4 +L(duLcr7): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) +L(dureturn29): + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) +L(dureturn27): + lwz rWORD8_SHIFT, 32(r1) +L(dureturn26): + lwz rWORD2_SHIFT, 28(r1) +L(dureturn25): + lwz rWORD4_SHIFT, 24(r1) + lwz rWORD6_SHIFT, 20(r1) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + blr +END (memcmp) + +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h new file mode 100644 index 0000000000..c76739e390 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h @@ -0,0 +1,116 @@ +/* memcopy.h -- definitions for memory copy functions. Generic C version. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The strategy of the memory functions is: + + 1. Copy bytes until the destination pointer is aligned. + + 2. Copy words in unrolled loops. If the source and destination + are not aligned in the same way, use word memory operations, + but shift and merge two read words before writing. + + 3. Copy the few remaining bytes. + + This is fast on processors that have at least 10 registers for + allocation by GCC, and that can access memory at reg+const in one + instruction. + + I made an "exhaustive" test of this memmove when I wrote it, + exhaustive in the sense that I tried all alignment and length + combinations, with and without overlap. */ + +#include <sysdeps/generic/memcopy.h> + +/* The macros defined in this file are: + + BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + + BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + + WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy) + + WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy) + + MERGE(old_word, sh_1, new_word, sh_2) + [I fail to understand. I feel stupid. --roland] +*/ + + +/* Threshold value for when to enter the unrolled loops. */ +#undef OP_T_THRES +#define OP_T_THRES 16 + +/* Copy exactly NBYTES bytes from SRC_BP to DST_BP, + without any assumptions about alignment of the pointers. */ +#undef BYTE_COPY_FWD +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ + do \ + { \ + size_t __nbytes = (nbytes); \ + if (__nbytes & 1) \ + { \ + ((byte *) dst_bp)[0] = ((byte *) src_bp)[0]; \ + src_bp += 1; \ + dst_bp += 1; \ + __nbytes -= 1; \ + } \ + while (__nbytes > 0) \ + { \ + byte __x = ((byte *) src_bp)[0]; \ + byte __y = ((byte *) src_bp)[1]; \ + src_bp += 2; \ + __nbytes -= 2; \ + ((byte *) dst_bp)[0] = __x; \ + ((byte *) dst_bp)[1] = __y; \ + dst_bp += 2; \ + } \ + } while (0) + +/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + beginning at the bytes right before the pointers and continuing towards + smaller addresses. Don't assume anything about alignment of the + pointers. */ +#undef BYTE_COPY_BWD +#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ + do \ + { \ + size_t __nbytes = (nbytes); \ + if (__nbytes & 1) \ + { \ + src_ep -= 1; \ + dst_ep -= 1; \ + ((byte *) dst_ep)[0] = ((byte *) src_ep)[0]; \ + __nbytes -= 1; \ + } \ + while (__nbytes > 0) \ + { \ + byte __x, __y; \ + src_ep -= 2; \ + __y = ((byte *) src_ep)[1]; \ + __x = ((byte *) src_ep)[0]; \ + dst_ep -= 2; \ + __nbytes -= 2; \ + ((byte *) dst_ep)[1] = __y; \ + ((byte *) dst_ep)[0] = __x; \ + } \ + } while (0) + +/* The powerpc memcpy implementation is safe to use for memmove. */ +#undef MEMCPY_OK_FOR_FWD_MEMMOVE +#define MEMCPY_OK_FOR_FWD_MEMMOVE 1 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S new file mode 100644 index 0000000000..37bc712dd9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S @@ -0,0 +1,481 @@ +/* Optimized memcpy implementation for PowerPC32 on PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination word (4-byte) aligned. Further optimization is + possible when both source and destination are word aligned. + Each case has an optimized unrolled loop. */ + + .machine power4 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + mr 30,3 + cmplwi cr1,5,31 + stw 31,24(1) + cfi_offset(31,(24-32)) + neg 0,3 + andi. 11,3,3 /* check alignment of dst. */ + clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */ + clrlwi 10,4,30 /* check alignment of src. */ + cmplwi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmplw cr6,10,11 + mr 12,4 + srwi 9,5,2 /* Number of full words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-3 bytes as needed to get the destination word aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,0f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +0: + clrlwi 10,12,30 /* check alignment of src again. */ + srwi 9,31,2 /* Number of full words remaining. */ + + /* Copy words from source to destination, assuming the destination is + aligned on a word boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also word aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are word aligned, and we can use + the optimized word copy loop. */ +.L0: + clrlwi 11,31,30 /* calculate the number of tail bytes */ + mtcrf 0x01,9 + bne- cr6,.L6 /* If source is not word aligned. */ + + /* Move words where destination and source are word aligned. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ + + srwi 8,31,4 /* calculate the 16 byte loop count */ + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + bf 30,1f + lwz 6,0(12) + lwz 7,4(12) + addi 11,12,8 + mtctr 8 + stw 6,0(3) + stw 7,4(3) + addi 10,3,8 + bf 31,4f + lwz 0,8(12) + stw 0,8(3) + blt cr1,3f + addi 11,12,12 + addi 10,3,12 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + lwz 6,0(12) + addi 11,12,4 + stw 6,0(3) + addi 10,3,4 + + .align 4 +4: + lwz 6,0(11) + lwz 7,4(11) + lwz 8,8(11) + lwz 0,12(11) + stw 6,0(10) + stw 7,4(10) + stw 8,8(10) + stw 0,12(10) + addi 11,11,16 + addi 10,10,16 + bdnz 4b +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-3 bytes and we know that the + destination is word aligned. */ +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +/* Copy up to 31 bytes. This is divided into two cases 0-8 bytes and + 9-31 bytes. Each case is handled without loops, using binary + (1,2,4,8) tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source than the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. While the destination and stores may + still be unaligned, this is only an issue for page (4096 byte + boundary) crossing, which should be rare for these short moves. + The hardware handles this case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmplwi cr1,5,16 + mr 10,5 + mr 12,4 + cmplwi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmplwi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +.L6: + + /* Copy words where the destination is aligned but the source is + not. Use aligned word loads from the source, shifted to realign + the data, to allow aligned destination stores. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + A single word is retained for storing at loop exit to avoid walking + off the end of a page within the loop. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ + + + cmplwi cr6,11,0 /* are there tail bytes left ? */ + subf 5,10,12 /* back up src pointer to prev word alignment */ + slwi 10,10,3 /* calculate number of bits to shift 1st word left */ + addi 11,9,-1 /* we move one word after the loop */ + srwi 8,11,2 /* calculate the 16 byte loop count */ + lwz 6,0(5) /* load 1st src word into R6 */ + mr 4,3 + lwz 7,4(5) /* load 2nd src word into R7 */ + mtcrf 0x01,11 + subfic 9,10,32 /* number of bits to shift 2nd word right */ + mtctr 8 + bf 30,1f + + /* there are at least two words to copy, so copy them */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + lwz 6,8(5) /* load the 3rd src word */ + stw 0,0(4) /* store the 1st dst word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 /* now left align 2nd src word into R0 */ + srw 8,6,9 /* shift 3rd src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + lwz 7,12(5) + stw 0,4(4) /* store the 2nd dst word */ + addi 4,4,8 + addi 5,5,16 + bf 31,4f + /* there is a third word to copy, so copy it */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 3rd src word to left align it in R0 */ + srw 8,7,9 /* shift 4th src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + stw 0,0(4) /* store 3rd dst word */ + mr 6,7 + lwz 7,0(5) + addi 5,5,4 + addi 4,4,4 + b 4f + .align 4 +1: +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif + addi 5,5,8 + or 0,0,8 /* or them to get word to store */ + bf 31,4f + mr 6,7 + lwz 7,0(5) + addi 5,5,4 + stw 0,0(4) /* store the 1st dst word */ + addi 4,4,4 + + .align 4 +4: + /* copy 16 bytes at a time */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + lwz 6,0(5) + stw 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 + srw 8,6,9 +#endif + or 0,0,8 + lwz 7,4(5) + stw 0,4(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + lwz 6,8(5) + stw 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 + srw 8,6,9 +#endif + or 0,0,8 + lwz 7,12(5) + stw 0,12(4) + addi 5,5,16 + addi 4,4,16 + bdnz+ 4b +8: + /* calculate and store the final word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + stw 0,0(4) +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + bne cr6,.L9 /* If the tail is 0 bytes we are done! */ + + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr +END (memcpy) + +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S new file mode 100644 index 0000000000..25319f7233 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S @@ -0,0 +1,226 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (1024 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + + .machine power4 +EALIGN (memset, 5, 0) + CALL_MCOUNT + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size (known to be 128). */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to word boundary. */ + cmplwi cr5, rLEN, 31 + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned): + mtcrf 0x01, rLEN + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srwi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrlwi. rLEN, rLEN, 27 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + + .align 4 +L(c3): dcbtst rNEG64, rMEMP + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + cmplwi cr1, rLEN, 16 + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + li rCLS,128 /* cache line size is 128 */ + dcbt 0,rMEMP +L(getCacheAligned): + cmplwi cr1,rLEN,32 + andi. rTMP,rMEMP,127 + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,-32(rMEMP) + stw rCHR,-28(rMEMP) + stw rCHR,-24(rMEMP) + stw rCHR,-20(rMEMP) + stw rCHR,-16(rMEMP) + stw rCHR,-12(rMEMP) + stw rCHR,-8(rMEMP) + stw rCHR,-4(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 4 +L(cacheAligned): + cmplw cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and the remainder + (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile new file mode 100644 index 0000000000..bd9d360efa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile @@ -0,0 +1,30 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-ppc32 memcmp-power7 memcmp-ppc32 memset-power7 \ + memset-power6 memset-ppc32 bzero-power7 bzero-power6 \ + bzero-ppc32 mempcpy-power7 mempcpy-ppc32 memchr-power7 \ + memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \ + rawmemchr-ppc32 strlen-power7 strlen-ppc32 strnlen-power7 \ + strnlen-ppc32 strncmp-power7 strncmp-ppc32 \ + strcasecmp-power7 strcasecmp_l-power7 strncase-power7 \ + strncase_l-power7 strchrnul-power7 strchrnul-ppc32 \ + strchr-power7 strchr-ppc32 \ + wordcopy-power7 wordcopy-ppc32 \ + memmove-power7 memmove-ppc + +CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops +CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-power7 wcschr-power6 wcschr-ppc32 \ + wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc32 \ + wcscpy-power7 wcscpy-power6 wcscpy-ppc32 + +CFLAGS-wcschr-power7.c += -mcpu=power7 +CFLAGS-wcschr-power6.c += -mcpu=power6 +CFLAGS-wcsrchr-power7.c += -mcpu=power7 +CFLAGS-wcsrchr-power6.c += -mcpu=power6 +CFLAGS-wcscpy-power7.c += -mcpu=power7 +CFLAGS-wcscpy-power6.c += -mcpu=power6 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S new file mode 100644 index 0000000000..4e000309cf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S @@ -0,0 +1,26 @@ +/* Optimized bzero implementation for PowerPC32/POWER6. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero_power6) + mr r5,r4 + li r4,0 + b __memset_power6@local +END (__bzero_power6) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S new file mode 100644 index 0000000000..580da55166 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S @@ -0,0 +1,26 @@ +/* Optimized bzero implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero_power7) + mr r5,r4 + li r4,0 + b __memset_power7@local +END (__bzero_power7) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S new file mode 100644 index 0000000000..33c69cbfb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S @@ -0,0 +1,35 @@ +/* Optimized bzero implementation for PowerPC32/PPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset ifunc selector is not built for static and memset@local + for shared builds makes the linker point the call to the ifunc + selector. */ +#ifdef SHARED +# define MEMSET __memset_ppc +#else +# define MEMSET memset +#endif + +ENTRY (__bzero_ppc) + mr r5,r4 + li r4,0 + b MEMSET@local +END (__bzero_ppc) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c new file mode 100644 index 0000000000..865920ee26 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c @@ -0,0 +1,37 @@ +/* Multiple versions of bzero. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# include <string.h> +# include <strings.h> +# include "init-arch.h" + +extern __typeof (bzero) __bzero_ppc attribute_hidden; +extern __typeof (bzero) __bzero_power6 attribute_hidden; +extern __typeof (bzero) __bzero_power7 attribute_hidden; + +libc_ifunc (__bzero, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 + : __bzero_ppc); + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..1caf15a07d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c @@ -0,0 +1,224 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 6 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + unsigned long int hwcap = GLRO(dl_hwcap); + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_POWER5_PLUS | + PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS | PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + +#ifdef SHARED + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, + __memset_power6) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, + __bzero_power6) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, + __strlen_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc)) + + /* Support sysdeps/powerpc/powerpc32/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, + __strncmp_power7) + IFUNC_IMPL_ADD (array, i, strncmp, 1, + __strncmp_ppc)) +#endif + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c. */ + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX, + __memcmp_power7) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c. */ + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + hwcap & PPC_FEATURE_HAS_VSX, + __mempcpy_power7) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, + __mempcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memchr_power7) + IFUNC_IMPL_ADD (array, i, memchr, 1, + __memchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memrchr_power7) + IFUNC_IMPL_ADD (array, i, memrchr, 1, + __memrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, + __rawmemchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, + __strcasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, + __strncasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap & PPC_FEATURE_HAS_VSX, + __strchrnul_power7) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, + __strchrnul_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strchr_power7) + IFUNC_IMPL_ADD (array, i, strchr, 1, + __strchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcschr_power7) + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcschr_power6) + IFUNC_IMPL_ADD (array, i, wcschr, 1, + __wcschr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcsrchr_power7) + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcsrchr_power6) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, + __wcsrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c. */ + IFUNC_IMPL (i, name, wcscpy, + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_HAS_VSX, + __wcscpy_power7) + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcscpy_power6) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, + __wcscpy_ppc)) + + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h new file mode 100644 index 0000000000..f2e6a4b705 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h @@ -0,0 +1,53 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> + +/* The code checks if _rtld_global_ro was realocated before trying to access + the dl_hwcap field. The assembly is to make the compiler not optimize the + test (&_rtld_global_ro != NULL), which is always true in ISO C (but not + in that case since _rtld_global_ro might not been realocated yet). */ +#if defined(SHARED) && !IS_IN (rtld) +# define __GLRO(value) \ + ({ volatile void **__p = (volatile void**)(&_rtld_global_ro); \ + unsigned long int __ret; \ + asm ("# x in %0" : "+r" (__p)); \ + __ret = (__p) ? GLRO(value) : 0; \ + __ret; }) +#else +# define __GLRO(value) GLRO(value) +#endif + +/* dl_hwcap contains only the latest supported ISA, the macro checks which is + and fills the previous ones. */ +#define INIT_ARCH() \ + unsigned long int hwcap = __GLRO(dl_hwcap); \ + unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \ + if (hwcap & PPC_FEATURE_ARCH_2_06) \ + hwcap |= PPC_FEATURE_ARCH_2_05 | \ + PPC_FEATURE_POWER5_PLUS | \ + PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_ARCH_2_05) \ + hwcap |= PPC_FEATURE_POWER5_PLUS | \ + PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_POWER5_PLUS) \ + hwcap |= PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_POWER5) \ + hwcap |= PPC_FEATURE_POWER4; diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S new file mode 100644 index 0000000000..e7eb56a8fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__memchr_power7); \ + .type C_SYMBOL_NAME(__memchr_power7),@function; \ + C_LABEL(__memchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c new file mode 100644 index 0000000000..1e4b88f9e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c @@ -0,0 +1,34 @@ +/* PowerPC32 default implementation of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCHR __memchr_ppc + +#undef weak_alias +#define weak_alias(a, b) + +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memchr_ppc, __GI_memchr, __memchr_ppc); +#endif + +extern __typeof (memchr) __memchr_ppc attribute_hidden; + +#include <string/memchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c new file mode 100644 index 0000000000..7eb4be7248 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c @@ -0,0 +1,41 @@ +/* Multiple versions of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# undef memchr +/* Redefine memchr so that the compiler won't make the weak_alias point + to internal hidden definition (__GI_memchr), since PPC32 does not + support local IFUNC calls. */ +# define memchr __redirect_memchr +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memchr) __memchr_ppc attribute_hidden; +extern __typeof (__redirect_memchr) __memchr_power7 attribute_hidden; + +extern __typeof (__redirect_memchr) __libc_memchr; + +libc_ifunc (__libc_memchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memchr_power7 + : __memchr_ppc); +#undef memchr +weak_alias (__libc_memchr, memchr) +#else +#include <string/memchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S new file mode 100644 index 0000000000..e002aef057 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S @@ -0,0 +1,41 @@ +/* Optimized memcmp implementation for POWER7/PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcmp_power7); \ + .type C_SYMBOL_NAME(__memcmp_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcmp_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef weak_alias +#define weak_alias(a, b) + +#include <sysdeps/powerpc/powerpc32/power7/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S new file mode 100644 index 0000000000..dc1f21bcb5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S @@ -0,0 +1,45 @@ +/* Default memcmp implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcmp_ppc); \ + .type C_SYMBOL_NAME(__memcmp_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcmp_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcmp_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcmp; __GI_memcmp = __memcmp_ppc + +# undef weak_alias +# define weak_alias(a, b) \ + .weak b ; b = __memcmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c new file mode 100644 index 0000000000..00bbcfaa4c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c @@ -0,0 +1,36 @@ +/* Multiple versions of memcmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define memcmp __redirect_memcmp +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; +extern __typeof (memcmp) __memcmp_power7 attribute_hidden; +# undef memcmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcmp, memcmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcmp_power7 + : __memcmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S new file mode 100644 index 0000000000..17a31226c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_a2); \ + .type C_SYMBOL_NAME(__memcpy_a2),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_a2) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_a2) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/a2/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S new file mode 100644 index 0000000000..59859c6b94 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_cell); \ + .type C_SYMBOL_NAME(__memcpy_cell),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_cell) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_cell) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/cell/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S new file mode 100644 index 0000000000..750151973b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC32 on POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_power6); \ + .type C_SYMBOL_NAME(__memcpy_power6),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_power6) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_power6) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power6/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S new file mode 100644 index 0000000000..3ac7c32084 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC32/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_power7); \ + .type C_SYMBOL_NAME(__memcpy_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S new file mode 100644 index 0000000000..f018684155 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S @@ -0,0 +1,41 @@ +/* Default memcpy implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_ppc); \ + .type C_SYMBOL_NAME(__memcpy_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c new file mode 100644 index 0000000000..b414ba946b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c @@ -0,0 +1,48 @@ +/* Multiple versions of memcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (memcpy) __memcpy_cell attribute_hidden; +extern __typeof (memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +# undef memcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcpy, memcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 + : (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell + : __memcpy_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c new file mode 100644 index 0000000000..12902fec3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c @@ -0,0 +1,41 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_power7; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_power7; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_power7; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_power7; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_power7 +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_power7 +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_power7 +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_power7 + +extern __typeof (memcpy) __memcpy_power7; +#define memcpy __memcpy_power7 + +extern __typeof (memmove) __memmove_power7; +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c new file mode 100644 index 0000000000..59f298507c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c @@ -0,0 +1,44 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_ppc; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_ppc; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_ppc; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_ppc; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_ppc +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_ppc +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_ppc +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_ppc + +extern __typeof (memcpy) __memcpy_ppc; +#define memcpy __memcpy_ppc + +extern __typeof (memmove) __memmove_ppc; +#define MEMMOVE __memmove_ppc + +#if defined SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__memmove_ppc, __GI_memmove, __memmove_ppc); +#endif + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c new file mode 100644 index 0000000000..481139fae8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c @@ -0,0 +1,36 @@ +/* Multiple versions of memmove. PowerPC32 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +extern __typeof (memmove) __memmove_power7 attribute_hidden; +# undef memmove + +libc_ifunc_redirected (__redirect_memmove, memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); +#else +# include <string/memmove.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S new file mode 100644 index 0000000000..a1a078dec6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S @@ -0,0 +1,35 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__mempcpy_power7); \ + .type C_SYMBOL_NAME(__mempcpy_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__mempcpy_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__mempcpy_power7) + +#include <sysdeps/powerpc/powerpc32/power7/mempcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c new file mode 100644 index 0000000000..2a20060e5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c @@ -0,0 +1,32 @@ +/* PowerPC32 default implementation of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define MEMPCPY __mempcpy_ppc + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(a, b) + +#if defined SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__mempcpy_ppc, __GI_mempcpy, __mempcpy_ppc); +#endif + +#include <string/mempcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c new file mode 100644 index 0000000000..0c7250a4bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c @@ -0,0 +1,44 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__mempcpy) __mempcpy_ppc attribute_hidden; +extern __typeof (__mempcpy) __mempcpy_power7 attribute_hidden; +# undef mempcpy +# undef __mempcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___mempcpy, __mempcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __mempcpy_power7 + : __mempcpy_ppc); + +weak_alias (__mempcpy, mempcpy) +#else +# include <string/mempcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S new file mode 100644 index 0000000000..4c3f6af9f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__memrchr_power7); \ + .type C_SYMBOL_NAME(__memrchr_power7),@function; \ + C_LABEL(__memrchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memrchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c new file mode 100644 index 0000000000..a0247f49c8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c @@ -0,0 +1,25 @@ +/* PowerPC32 default implementation of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define MEMRCHR __memrchr_ppc +# include <string.h> +extern void *__memrchr_ppc (const void *, int, size_t); +#endif + +#include <string/memrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c new file mode 100644 index 0000000000..fb09fdf89c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; +extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memrchr_power7 + : __memrchr_ppc); + +weak_alias (__memrchr, memrchr) +#else +#include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S new file mode 100644 index 0000000000..55ff437a20 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S @@ -0,0 +1,38 @@ +/* Optimized 32-bit memset implementation for POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_power6); \ + .type C_SYMBOL_NAME(__memset_power6),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_power6) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_power6) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power6/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S new file mode 100644 index 0000000000..ced4cb015b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S @@ -0,0 +1,38 @@ +/* Optimized memset implementation for PowerPC32/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_power7); \ + .type C_SYMBOL_NAME(__memset_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S new file mode 100644 index 0000000000..63cd5b4eea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S @@ -0,0 +1,41 @@ +/* Default memset implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_ppc); \ + .type C_SYMBOL_NAME(__memset_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c new file mode 100644 index 0000000000..afcca12c78 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c @@ -0,0 +1,39 @@ +/* Multiple versions of memset. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define memset __redirect_memset +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memset) __memset_ppc attribute_hidden; +extern __typeof (memset) __memset_power6 attribute_hidden; +extern __typeof (memset) __memset_power7 attribute_hidden; +# undef memset + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memset, memset, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 + : __memset_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S new file mode 100644 index 0000000000..e088c6b046 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized rawrawmemchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__rawmemchr_power7); \ + .type C_SYMBOL_NAME(__rawmemchr_power7),@function; \ + C_LABEL(__rawmemchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__rawmemchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/rawmemchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c new file mode 100644 index 0000000000..bce76cbe75 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c @@ -0,0 +1,32 @@ +/* PowerPC32 default implementation of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define RAWMEMCHR __rawmemchr_ppc +#undef weak_alias +#define weak_alias(a, b) +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__rawmemchr_ppc, __GI___rawmemchr, __rawmemchr_ppc); +#endif + +extern __typeof (rawmemchr) __rawmemchr_ppc attribute_hidden; + +#include <string/rawmemchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c new file mode 100644 index 0000000000..6ea56db0af --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c @@ -0,0 +1,38 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; +extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; +# undef __rawmemchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __rawmemchr_power7 + : __rawmemchr_ppc); +weak_alias (__rawmemchr, rawmemchr) +#else +#include <string/rawmemchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S new file mode 100644 index 0000000000..b676dd147e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S @@ -0,0 +1,19 @@ +/* Loader memcmp implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S new file mode 100644 index 0000000000..b9eb81328f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S new file mode 100644 index 0000000000..5d197557af --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c new file mode 100644 index 0000000000..79704aa2d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string/strnlen.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S new file mode 100644 index 0000000000..f9324a972e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S @@ -0,0 +1,39 @@ +/* Optimized strcasecmp implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strcasecmp_power7); \ + .type C_SYMBOL_NAME(__strcasecmp_power7),@function; \ + C_LABEL(__strcasecmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strcasecmp_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c new file mode 100644 index 0000000000..da7d8415d2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c @@ -0,0 +1,41 @@ +/* Multiple versions of strcasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp __strcasecmp_ppc + +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +#endif + +#include <string/strcasecmp.c> +#undef strcasecmp + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp) __libc_strcasecmp; +libc_ifunc (__libc_strcasecmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); + +weak_alias (__libc_strcasecmp, strcasecmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S new file mode 100644 index 0000000000..66e0584139 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S @@ -0,0 +1,41 @@ +/* Default strcasecmp implementation for PowerPC32. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strcasecmp_l_power7); \ + .type C_SYMBOL_NAME(__strcasecmp_l_power7),@function; \ + C_LABEL(__strcasecmp_l_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strcasecmp_l_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define USE_IN_EXTENDED_LOCALE_MODEL + +#include <sysdeps/powerpc/powerpc32/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c new file mode 100644 index 0000000000..85411f5558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c @@ -0,0 +1,41 @@ +/* Multiple versions of strcasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp_l __strcasecmp_l_ppc + +extern __typeof (__strcasecmp_l) __strcasecmp_l_ppc attribute_hidden; +extern __typeof (__strcasecmp_l) __strcasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strcasecmp_l.c> +#undef strcasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp_l) __libc_strcasecmp_l; +libc_ifunc (__libc_strcasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_l_power7 + : __strcasecmp_l_ppc); + +weak_alias (__libc_strcasecmp_l, strcasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S new file mode 100644 index 0000000000..7624a27bbd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S @@ -0,0 +1,39 @@ +/* Optimized strchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchr_power7); \ + .type C_SYMBOL_NAME(__strchr_power7),@function; \ + C_LABEL(__strchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S new file mode 100644 index 0000000000..7dcd55af59 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S @@ -0,0 +1,41 @@ +/* PowerPC32 default implementation of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef SHARED +# undef ENTRY +# define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchr_ppc); \ + .type C_SYMBOL_NAME(__strchr_ppc),@function; \ + .align ALIGNARG(2); \ + C_LABEL(__strchr_ppc) \ + cfi_startproc; \ + CALL_MCOUNT + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchr_ppc) \ + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c new file mode 100644 index 0000000000..712bc1a4b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c @@ -0,0 +1,39 @@ +/* Multiple versions of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strchr) __strchr_ppc attribute_hidden; +extern __typeof (strchr) __strchr_power7 attribute_hidden; +# undef strchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strchr, strchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchr_power7 + : __strchr_ppc); +weak_alias (strchr, index) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S new file mode 100644 index 0000000000..3baad50818 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S @@ -0,0 +1,39 @@ +/* Optimized strchrnul implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchrnul_power7); \ + .type C_SYMBOL_NAME(__strchrnul_power7),@function; \ + C_LABEL(__strchrnul_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchrnul_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c new file mode 100644 index 0000000000..c981eb67f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c @@ -0,0 +1,28 @@ +/* PowerPC32 default implementation of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCHRNUL __strchrnul_ppc + +#undef weak_alias +#define weak_alias(a,b ) + +extern __typeof (strchrnul) __strchrnul_ppc attribute_hidden; + +#include <string/strchrnul.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c new file mode 100644 index 0000000000..b8f853d8e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c @@ -0,0 +1,37 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strchrnul, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchrnul_power7 + : __strchrnul_ppc); + +weak_alias (__strchrnul, strchrnul) +#else +#include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S new file mode 100644 index 0000000000..7681b827d6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S @@ -0,0 +1,36 @@ +/* Optimized strlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strlen_power7); \ + .type C_SYMBOL_NAME(__strlen_power7),@function; \ + C_LABEL(__strlen_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strlen_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S new file mode 100644 index 0000000000..b665977e17 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S @@ -0,0 +1,41 @@ +/* Default strlen implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) + +#include <sysdep.h> + +# undef ENTRY +# define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strlen_ppc); \ + .type C_SYMBOL_NAME(__strlen_ppc),@function; \ + C_LABEL(__strlen_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strlen_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strlen; __GI_strlen = __strlen_ppc + +#endif + +#include <sysdeps/powerpc/powerpc32/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c new file mode 100644 index 0000000000..c13940e999 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c @@ -0,0 +1,33 @@ +/* Multiple versions of strlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strlen __redirect_strlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +# undef strlen + +libc_ifunc_redirected (__redirect_strlen, strlen, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strlen_power7 + : __strlen_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c new file mode 100644 index 0000000000..a49bed9278 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c @@ -0,0 +1,26 @@ +/* Optimized strcasecmp_l implememtation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#include <string.h> + +#define __strncasecmp __strncasecmp_power7 + +extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c new file mode 100644 index 0000000000..089faa9853 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c @@ -0,0 +1,41 @@ +/* Multiple versions of strncasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp __strncasecmp_ppc +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +#endif + +#include <string/strncase.c> +#undef strncasecmp + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp) __libc_strncasecmp; +libc_ifunc (__libc_strncasecmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); +weak_alias (__libc_strncasecmp, strncasecmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c new file mode 100644 index 0000000000..80f7d48133 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c @@ -0,0 +1,26 @@ +/* Optimized strcasecmp_l implememtation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp_l __strncasecmp_l_power7 +#define USE_IN_EXTENDED_LOCALE_MODEL 1 + +extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c new file mode 100644 index 0000000000..c988c8dd3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncasecmp_l. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp_l __strncasecmp_l_ppc +extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; +extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strncase_l.c> +#undef strncasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; +libc_ifunc (__libc_strncasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_l_power7 + : __strncasecmp_l_ppc); + +weak_alias (__libc_strncasecmp_l, strncasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S new file mode 100644 index 0000000000..cbe969a5a4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S @@ -0,0 +1,38 @@ +/* Optimized strcmp implementation for POWER7/PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__strncmp_power7); \ + .type C_SYMBOL_NAME(__strncmp_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__strncmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strncmp_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S new file mode 100644 index 0000000000..2f5d2d3651 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__strncmp_ppc); \ + .type C_SYMBOL_NAME(__strncmp_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__strncmp_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strncmp_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c new file mode 100644 index 0000000000..bb4e892df8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c @@ -0,0 +1,39 @@ +/* Multiple versions of strncmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncmp) __strncmp_ppc attribute_hidden; +extern __typeof (strncmp) __strncmp_power4 attribute_hidden; +extern __typeof (strncmp) __strncmp_power7 attribute_hidden; +# undef strncmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncmp, strncmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power7 + : __strncmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..3f5a32f0c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S @@ -0,0 +1,40 @@ +/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strnlen_power7); \ + .type C_SYMBOL_NAME(__strnlen_power7),@function; \ + C_LABEL(__strnlen_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strnlen_power7) + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c new file mode 100644 index 0000000000..b41fc9d359 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c @@ -0,0 +1,28 @@ +/* Default strnlen implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRNLEN __strnlen_ppc +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__strnlen_ppc, __GI_strnlen, __strnlen_ppc); \ + strong_alias (__strnlen_ppc, __strnlen_ppc_1); \ + __hidden_ver1 (__strnlen_ppc_1, __GI___strnlen, __strnlen_ppc_1); +#endif + +#include <string/strnlen.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c new file mode 100644 index 0000000000..f2883e69eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c @@ -0,0 +1,36 @@ +/* Multiple versions of strnlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strnlen) __strnlen_ppc attribute_hidden; +extern __typeof (__strnlen) __strnlen_power7 attribute_hidden; +# undef strnlen +# undef __strnlen + +libc_ifunc_redirected (__redirect___strnlen, __strnlen, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strnlen_power7 + : __strnlen_ppc); +weak_alias (__strnlen, strnlen) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c new file mode 100644 index 0000000000..6610b5ef82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c @@ -0,0 +1,26 @@ +/* wcschr.c - Wide Character Search for powerpc32/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCHR __wcschr_power6 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c new file mode 100644 index 0000000000..7e22c441ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c @@ -0,0 +1,26 @@ +/* wcschr.c - Wide Character Search for powerpc32/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCHR __wcschr_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c new file mode 100644 index 0000000000..777ec080b2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# undef libc_hidden_weak +# define libc_hidden_weak(name) + +# undef weak_alias +# undef libc_hidden_def + +# ifdef SHARED +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcschr_ppc, __GI_wcschr, __wcschr_ppc); \ + strong_alias (__wcschr_ppc, __wcschr_ppc_1); \ + __hidden_ver1 (__wcschr_ppc_1, __GI___wcschr, __wcschr_ppc_1); +# define weak_alias(name,alias) +# else +# define weak_alias(name, alias) \ + _weak_alias(__wcschr_ppc, __wcschr) +# define libc_hidden_def(name) +# endif /* SHARED */ +#endif + +extern __typeof (wcschr) __wcschr_ppc; + +#define WCSCHR __wcschr_ppc +#include <wcsmbs/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c new file mode 100644 index 0000000000..059665f1b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c @@ -0,0 +1,41 @@ +/* Multiple versions of wcschr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcschr __redirect_wcschr +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_wcschr) __wcschr_ppc attribute_hidden; +extern __typeof (__redirect_wcschr) __wcschr_power6 attribute_hidden; +extern __typeof (__redirect_wcschr) __wcschr_power7 attribute_hidden; + +extern __typeof (__redirect_wcschr) __libc_wcschr; + +libc_ifunc (__libc_wcschr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcschr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcschr_power6 + : __wcschr_ppc); +#undef wcschr +weak_alias (__libc_wcschr, wcschr) +#else +#include <wcsmbs/wcschr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c new file mode 100644 index 0000000000..8e732fc80c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCPY __wcscpy_power6 + +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c new file mode 100644 index 0000000000..dece1024f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCPY __wcscpy_power7 + +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c new file mode 100644 index 0000000000..b48ff54d92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# define WCSCPY __wcscpy_ppc +#endif + +extern __typeof (wcscpy) __wcscpy_ppc; + +#include <wcsmbs/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c new file mode 100644 index 0000000000..a59e794f03 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcscpy + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; + +libc_ifunc (wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +#else +#include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c new file mode 100644 index 0000000000..0391e12442 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WCSRCHR __wcsrchr_power6 + +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c new file mode 100644 index 0000000000..1167a75734 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WCSRCHR __wcsrchr_power7 + +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c new file mode 100644 index 0000000000..1c8e12eefb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# define WCSRCHR __wcsrchr_ppc +#endif + +extern __typeof (wcsrchr) __wcsrchr_ppc; + +#include <wcsmbs/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c new file mode 100644 index 0000000000..10820443b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcsrchr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcsrchr) __wcsrchr_ppc attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power6 attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power7 attribute_hidden; + +libc_ifunc (wcsrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcsrchr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcsrchr_power6 + : __wcsrchr_ppc); +#else +#include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c new file mode 100644 index 0000000000..d2095d85ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned_power7 +#define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned_power7 +#define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned_power7 +#define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned_power7 + +#include <sysdeps/powerpc/power6/wordcopy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c new file mode 100644 index 0000000000..ecdc2fa73d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned_ppc +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned_ppc +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned_ppc +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned_ppc + +# include <sysdeps/powerpc/power4/wordcopy.c> +#else +# include <string/wordcopy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S new file mode 100644 index 0000000000..42a67e7e8a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S @@ -0,0 +1,196 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (strncmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrlwi. rTMP, rTMP, 30 + cmplwi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP, rN, 2 + clrlwi rN, rN, 30 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmplwi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1, 4(rSTR1) + bne- cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 4 + bne- cr1, L(different) + addi rSTR2, rSTR2, 4 + cmplwi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + ble cr1, L(ux) +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + .align 4 +L(u1): + cmpwi cr1, rWORD1, 0 + bdz L(u4) + cmpw rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpwi cr1, rWORD3, 0 + bdz L(u3) + cmpw rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpwi cr1, rWORD1, 0 + bdz L(u4) + cmpw rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpwi cr1, rWORD3, 0 + bdz L(u3) + cmpw rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + b L(u1) + +L(u3): sub rRTN, rWORD3, rWORD4 + blr +L(u4): sub rRTN, rWORD1, rWORD2 + blr +L(ux): + li rRTN, 0 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies new file mode 100644 index 0000000000..02d222d22a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies @@ -0,0 +1,4 @@ +powerpc/power5+/fpu +powerpc/power5+ +powerpc/powerpc32/power5/fpu +powerpc/powerpc32/power5 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies new file mode 100644 index 0000000000..76a985188e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S new file mode 100644 index 0000000000..efe7be8242 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S @@ -0,0 +1,36 @@ +/* ceil function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__ceil, 4, 0) + frip fp1, fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S new file mode 100644 index 0000000000..cff058e7ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S @@ -0,0 +1,29 @@ +/* ceilf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__ceilf, 4, 0) + frip fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S new file mode 100644 index 0000000000..9f040d8457 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S @@ -0,0 +1,36 @@ +/* floor function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__floor, 4, 0) + frim fp1, fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S new file mode 100644 index 0000000000..b84e4c64fb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S @@ -0,0 +1,29 @@ +/* floorf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__floorf, 4, 0) + frim fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S new file mode 100644 index 0000000000..adbc7ebe18 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S @@ -0,0 +1,59 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] llround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S new file mode 100644 index 0000000000..030d2fdff8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S new file mode 100644 index 0000000000..f61846331d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S @@ -0,0 +1,57 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__lround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S new file mode 100644 index 0000000000..91b42352f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S @@ -0,0 +1,36 @@ +/* round function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__round, 4, 0) + frin fp1, fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S new file mode 100644 index 0000000000..4e0c7e5cec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S @@ -0,0 +1,29 @@ +/* roundf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__roundf, 4, 0) + frin fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S new file mode 100644 index 0000000000..ceca529826 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S @@ -0,0 +1,36 @@ +/* trunc function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__trunc, 4, 0) + friz fp1, fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S new file mode 100644 index 0000000000..60be314c28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S @@ -0,0 +1,29 @@ +/* truncf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__truncf, 4, 0) + friz fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies new file mode 100644 index 0000000000..54b3931625 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies new file mode 100644 index 0000000000..17139bf21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power4/fpu +powerpc/powerpc32/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies new file mode 100644 index 0000000000..c6c090a60e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power4/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..09a2fe3865 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S @@ -0,0 +1,61 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24+HIWORD(r1) + lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S new file mode 100644 index 0000000000..7948f52e84 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S @@ -0,0 +1,45 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power5 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + stfs fp1,28(r1) /* copy FPR to GPR */ + nop + nop + lwz r4,28(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S new file mode 100644 index 0000000000..93625c5aa9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S @@ -0,0 +1,106 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fabs fp0,fp1 + fsqrt fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + li r3,26 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S new file mode 100644 index 0000000000..2ca86b6155 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S @@ -0,0 +1,98 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fabs fp0,fp1 + fsqrts fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + li r3,126 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies new file mode 100644 index 0000000000..d29e3853ab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power4/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies new file mode 100644 index 0000000000..8e5b58a57a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power5+/fpu +powerpc/powerpc32/power5+ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies new file mode 100644 index 0000000000..c66805ee63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S new file mode 100644 index 0000000000..d6cc8011ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S @@ -0,0 +1,58 @@ +/* copysign(). PowerPC32/POWER6 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + + .section ".text" + .type __copysign, @function + .machine power6 +EALIGN (__copysign, 4, 0) + CALL_MCOUNT + fcpsgn fp1,fp2,fp1 + blr +END (__copysign) + +hidden_def (__copysign) +weak_alias (__copysign, copysign) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__copysign, __copysignf) +hidden_def (__copysignf) +weak_alias (__copysignf, copysignf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, copysign, copysignl, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, copysign, copysignl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S new file mode 100644 index 0000000000..d4aa702d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_copysign.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..5b19433cbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S @@ -0,0 +1,61 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24+HIWORD(r1) + lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S new file mode 100644 index 0000000000..7a19ed86d2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S @@ -0,0 +1,44 @@ +/* isnanf(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power6 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfs fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S new file mode 100644 index 0000000000..326e77361b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3, r4] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S new file mode 100644 index 0000000000..0950e7e7c7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S @@ -0,0 +1,38 @@ +/* Round float to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3, r4] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) + +weak_alias (__llrintf, llrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S new file mode 100644 index 0000000000..83ba999a39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S @@ -0,0 +1,59 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] llround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S new file mode 100644 index 0000000000..030d2fdff8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S new file mode 100644 index 0000000000..81b62cba21 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S @@ -0,0 +1,907 @@ +/* Optimized memcpy implementation for PowerPC32 on POWER6. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination word (4-byte) aligned. Further optimization is + possible when both source and destination are word aligned. + Each case has an optimized unrolled loop. */ + + .machine power6 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + cmplwi cr1,5,31 /* check for short move. */ + neg 0,3 + cmplwi cr1,5,31 + clrlwi 10,4,30 /* check alignment of src. */ + andi. 11,3,3 /* check alignment of dst. */ + clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */ + ble- cr1,L(word_unaligned_short) /* If move < 32 bytes. */ + cmplw cr6,10,11 + stw 31,24(1) + stw 30,20(1) + cfi_offset(31,(24-32)) + cfi_offset(30,(20-32)) + mr 30,3 + beq .L0 + mtcrf 0x01,0 + subf 31,0,5 /* Length after alignment. */ + add 12,4,0 /* Compute src addr after alignment. */ + /* Move 0-3 bytes as needed to get the destination word aligned. */ +1: bf 31,2f + lbz 6,0(4) + bf 30,3f + lhz 7,1(4) + stb 6,0(3) + sth 7,1(3) + addi 3,3,3 + b 0f +3: + stb 6,0(3) + addi 3,3,1 + b 0f +2: bf 30,0f + lhz 6,0(4) + sth 6,0(3) + addi 3,3,2 +0: + clrlwi 10,12,30 /* check alignment of src again. */ + srwi 9,31,2 /* Number of full words remaining. */ + bne- cr6,L(wdu) /* If source is not word aligned. .L6 */ + clrlwi 11,31,30 /* calculate the number of tail bytes */ + b L(word_aligned) + /* Copy words from source to destination, assuming the destination is + aligned on a word boundary. + + At this point we know there are at least 29 bytes left (32-3) to copy. + The next step is to determine if the source is also word aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are word aligned, and we can use + the optimized word copy loop. */ + .align 4 +.L0: + mr 31,5 + mr 12,4 + bne- cr6,L(wdu) /* If source is not word aligned. .L6 */ + srwi 9,5,2 /* Number of full words remaining. */ + clrlwi 11,5,30 /* calculate the number of tail bytes */ + + /* Move words where destination and source are word aligned. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ +L(word_aligned): + mtcrf 0x01,9 + srwi 8,31,4 /* calculate the 16 byte loop count */ + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + bf 30,1f + lwz 6,0(12) + lwz 7,4(12) + addi 11,12,8 + mtctr 8 + stw 6,0(3) + stw 7,4(3) + addi 10,3,8 + bf 31,4f + lwz 0,8(12) + stw 0,8(3) + blt cr1,3f + addi 11,12,12 + addi 10,3,12 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + lwz 6,0(12) + addi 11,12,4 + stw 6,0(3) + addi 10,3,4 + + .align 4 +4: + lwz 6,0(11) + lwz 7,4(11) + lwz 8,8(11) + lwz 0,12(11) + stw 6,0(10) + stw 7,4(10) + stw 8,8(10) + stw 0,12(10) + addi 11,11,16 + addi 10,10,16 + bdnz 4b +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-3 bytes and we know that the + destination is word aligned. */ +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 128-byte, + and 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 128-byte + boundaries. Since only loads are sensitive to the 32-/128-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small (~20 cycle) delay. */ + .align 4 + + cfi_same_value (31) + cfi_same_value (30) +L(word_unaligned_short): + mtcrf 0x01,5 + cmplwi cr6,5,8 + neg 8,4 + clrrwi 9,4,2 + andi. 0,8,3 + beq cr6,L(wus_8) /* Handle moves of 8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmplwi cr1,5,16 + mr 12,4 + ble cr6,L(wus_4) /* Handle moves of 0-8 bytes. */ + mr 11,3 + mr 10,5 + cmplwi cr6,0,2 + beq L(wus_tail) /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(9) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmplwi cr1,10,16 + add 11,3,0 + mtcrf 0x01,10 + .align 4 +L(wus_tail): +/* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ +/* We need to special case the fall-through because the biggest delays + are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) + blt cr1,L(wus_tail8) + cmplwi cr0,10,24 +L(wus_tail16): /* Move 16 bytes. */ + stw 6,0(11) + stw 7,4(11) + lwz 6,8(12) + lwz 7,12(12) + stw 6,8(11) + stw 7,12(11) +/* Move 8 bytes more. */ + bf 28,L(wus_tail16p8) + cmplwi cr1,10,28 + lwz 6,16(12) + lwz 7,20(12) + stw 6,16(11) + stw 7,20(11) +/* Move 4 bytes more. */ + bf 29,L(wus_tail16p4) + lwz 6,24(12) + stw 6,24(11) + addi 12,12,28 + addi 11,11,28 + bgt cr1,L(wus_tail2) + /* exactly 28 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(wus_tailX) /* exactly 16 bytes, early exit. */ + cmplwi cr1,10,20 + bf 29,L(wus_tail16p2) +/* Move 4 bytes more. */ + lwz 6,16(12) + stw 6,16(11) + addi 12,12,20 + addi 11,11,20 + bgt cr1,L(wus_tail2) + /* exactly 20 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 11,11,24 + bgt cr0,L(wus_tail2) + /* exactly 24 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 11,11,16 + b L(wus_tail2) + + .align 4 +L(wus_tail8): /* Move 8 bytes. */ +/* r6, r7 already loaded speculatively. */ + cmplwi cr1,10,8 + cmplwi cr0,10,12 + bf 28,L(wus_tail4) + stw 6,0(11) + stw 7,4(11) +/* Move 4 bytes more. */ + bf 29,L(wus_tail8p4) + lwz 6,8(12) + stw 6,8(11) + addi 12,12,12 + addi 11,11,12 + bgt cr0,L(wus_tail2) + /* exactly 12 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 11,11,8 + bgt cr1,L(wus_tail2) + /* exactly 8 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + + .align 4 +L(wus_tail4): /* Move 4 bytes. */ +/* r6 already loaded speculatively. If we are here we know there is + more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(11) + addi 11,11,4 +L(wus_tail2): /* Move 2-3 bytes. */ + bf 30,L(wus_tail1) + lhz 6,0(12) + sth 6,0(11) + bf 31,L(wus_tailX) + lbz 7,2(12) + stb 7,2(11) + addi 1,1,32 + blr +L(wus_tail1): /* Move 1 byte. */ + bf 31,L(wus_tailX) + lbz 6,0(12) + stb 6,0(11) +L(wus_tailX): + /* Return original dst pointer. */ + addi 1,1,32 + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +L(wus_8): + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + addi 1,1,32 + blr + .align 4 +L(wus_4): + bf 29,L(wus_2) + lwz 6,0(4) + stw 6,0(3) + bf 30,L(wus_5) + lhz 7,4(4) + sth 7,4(3) + bf 31,L(wus_0) + lbz 8,6(4) + stb 8,6(3) + addi 1,1,32 + blr + .align 4 +L(wus_5): + bf 31,L(wus_0) + lbz 6,4(4) + stb 6,4(3) + /* Return original dst pointer. */ + addi 1,1,32 + blr + .align 4 +L(wus_2): /* Move 2-3 bytes. */ + bf 30,L(wus_1) + lhz 6,0(4) + sth 6,0(3) + bf 31,L(wus_0) + lbz 7,2(4) + stb 7,2(3) + addi 1,1,32 + blr + .align 4 +L(wus_1): /* Move 1 byte. */ + bf 31,L(wus_0) + lbz 6,0(4) + stb 6,0(3) + .align 3 +L(wus_0): + /* Return original dst pointer. */ + addi 1,1,32 + blr + + .align 4 + cfi_offset(31,(24-32)) + cfi_offset(30,(20-32)) +L(wdu): + + /* Copy words where the destination is aligned but the source is + not. For power4, power5 and power6 machines there is penalty for + unaligned loads (src) that cross 32-byte, cacheline, or page + boundaries. So we want to use simple (unaligned) loads where + possible but avoid them where we know the load would span a 32-byte + boundary. + + At this point we know we have at least 29 (32-3) bytes to copy + the src is unaligned. and we may cross at least one 32-byte + boundary. Also we have the following register values: + r3 == adjusted dst, word aligned + r4 == unadjusted src + r5 == unadjusted len + r9 == adjusted Word length + r10 == src alignment (1-3) + r12 == adjusted src, not aligned + r31 == adjusted len + + First we need to copy word up to but not crossing the next 32-byte + boundary. Then perform aligned loads just before and just after + the boundary and use shifts and or to generate the next aligned + word for dst. If more than 32 bytes remain we copy (unaligned src) + the next 7 words and repeat the loop until less than 32-bytes + remain. + + Then if more than 4 bytes remain we again use aligned loads, + shifts and or to generate the next dst word. We then process the + remaining words using unaligned loads as needed. Finally we check + if there are more than 0 bytes (1-3) bytes remaining and use + halfword and or byte load/stores to complete the copy. +*/ + mr 4,12 /* restore unaligned adjusted src ptr */ + clrlwi 0,12,27 /* Find dist from previous 32-byte boundary. */ + slwi 10,10,3 /* calculate number of bits to shift 1st word left */ + cmplwi cr5,0,16 + subfic 8,0,32 /* Number of bytes to next 32-byte boundary. */ + + mtcrf 0x01,8 + cmplwi cr1,10,16 + subfic 9,10,32 /* number of bits to shift 2nd word right */ +/* This test is reversed because the timing to compare the bytes to + 32-byte boundary could not be meet. So we compare the bytes from + previous 32-byte boundary and invert the test. */ + bge cr5,L(wdu_h32_8) + .align 4 + lwz 6,0(4) + lwz 7,4(4) + addi 12,4,16 /* generate alternate pointers to avoid agen */ + addi 11,3,16 /* timing issues downstream. */ + stw 6,0(3) + stw 7,4(3) + subi 31,31,16 + lwz 6,8(4) + lwz 7,12(4) + addi 4,4,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 + bf 28,L(wdu_h32_4) + lwz 6,0(12) + lwz 7,4(12) + subi 31,31,8 + addi 4,4,8 + stw 6,0(11) + stw 7,4(11) + addi 3,3,8 + bf 29,L(wdu_h32_0) + lwz 6,8(12) + addi 4,4,4 + subi 31,31,4 + stw 6,8(11) + addi 3,3,4 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_8): + bf 28,L(wdu_h32_4) + lwz 6,0(4) + lwz 7,4(4) + subi 31,31,8 + bf 29,L(wdu_h32_8x) + stw 6,0(3) + stw 7,4(3) + lwz 6,8(4) + addi 4,4,12 + subi 31,31,4 + stw 6,8(3) + addi 3,3,12 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_8x): + addi 4,4,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_4): + bf 29,L(wdu_h32_0) + lwz 6,0(4) + subi 31,31,4 + addi 4,4,4 + stw 6,0(3) + addi 3,3,4 + .align 4 +L(wdu_h32_0): +/* set up for 32-byte boundary crossing word move and possibly 32-byte + move loop. */ + clrrwi 12,4,2 + cmplwi cr5,31,32 + bge cr1,L(wdu2_32) +#if 0 + b L(wdu1_32) +/* + cmplwi cr1,10,8 + beq cr1,L(wdu1_32) + cmplwi cr1,10,16 + beq cr1,L(wdu2_32) + cmplwi cr1,10,24 + beq cr1,L(wdu3_32) +*/ +L(wdu_32): + lwz 6,0(12) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ + slw 0,6,10 + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu_32tail) + mtctr 8 + cmplwi cr6,31,4 + .align 4 +L(wdu_loop32): + /* copy 32 bytes at a time */ + lwz 8,4(12) + addi 12,12,32 + lwz 7,4(4) + srw 8,8,9 + or 0,0,8 + stw 0,0(3) + stw 7,4(3) + lwz 6,8(4) + lwz 7,12(4) + stw 6,8(3) + stw 7,12(3) + lwz 6,16(4) + lwz 7,20(4) + stw 6,16(3) + stw 7,20(3) + lwz 6,24(4) + lwz 7,28(4) + lwz 8,0(12) + addi 4,4,32 + stw 6,24(3) + stw 7,28(3) + addi 3,3,32 + slw 0,8,10 + bdnz+ L(wdu_loop32) + +L(wdu_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,4(12) + srw 8,8,9 + or 6,0,8 + b L(wdu_32tailx) +#endif + .align 4 +L(wdu1_32): + lwz 6,-1(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,8 +#else + slwi 6,6,8 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu1_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,3(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif + b L(wdu1_loop32x) + .align 4 +L(wdu1_loop32): + /* copy 32 bytes at a time */ + lwz 8,3(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif +L(wdu1_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) + lwz 8,32-1(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,8 +#else + slwi 6,8,8 +#endif + bdnz+ L(wdu1_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu1_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,3(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif + b L(wdu_32tailx) + +L(wdu2_32): + bgt cr1,L(wdu3_32) + lwz 6,-2(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,16 +#else + slwi 6,6,16 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu2_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,2(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif + b L(wdu2_loop32x) + .align 4 +L(wdu2_loop32): + /* copy 32 bytes at a time */ + lwz 8,2(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif +L(wdu2_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) +/* lwz 8,0(12) */ + lwz 8,32-2(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,16 +#else + slwi 6,8,16 +#endif + bdnz+ L(wdu2_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu2_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,2(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif + b L(wdu_32tailx) + +L(wdu3_32): +/* lwz 6,0(12) */ + lwz 6,-3(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,24 +#else + slwi 6,6,24 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu3_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,1(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif + b L(wdu3_loop32x) + .align 4 +L(wdu3_loop32): + /* copy 32 bytes at a time */ + lwz 8,1(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif +L(wdu3_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) + lwz 8,32-3(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,24 +#else + slwi 6,8,24 +#endif + bdnz+ L(wdu3_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu3_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,1(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif + b L(wdu_32tailx) + .align 4 +L(wdu_32tailx): + blt cr5,L(wdu_t32_8) + lwz 7,4(4) + addi 12,4,16 /* generate alternate pointers to avoid agen */ + addi 11,3,16 /* timing issues downstream. */ + stw 6,0(3) + stw 7,4(3) + subi 31,31,16 + lwz 6,8(4) + lwz 7,12(4) + addi 4,4,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 + bf 28,L(wdu_t32_4x) + lwz 6,0(12) + lwz 7,4(12) + addi 4,4,8 + subi 31,31,8 + stw 6,0(11) + stw 7,4(11) + addi 3,3,8 + bf 29,L(wdu_t32_0) + lwz 6,8(12) + addi 4,4,4 + subi 31,31,4 + stw 6,8(11) + addi 3,3,4 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_4x): + bf 29,L(wdu_t32_0) + lwz 6,0(4) + addi 4,4,4 + subi 31,31,4 + stw 6,0(3) + addi 3,3,4 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_8): + bf 28,L(wdu_t32_4) + lwz 7,4(4) + subi 31,31,8 + bf 29,L(wdu_t32_8x) + stw 6,0(3) + stw 7,4(3) + lwz 6,8(4) + subi 31,31,4 + addi 4,4,12 + stw 6,8(3) + addi 3,3,12 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_8x): + addi 4,4,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_4): + subi 31,31,4 + stw 6,0(3) + addi 4,4,4 + addi 3,3,4 + .align 4 +L(wdu_t32_0): +L(wdu_4tail): + cmplwi cr6,31,0 + beq cr6,L(wdus_0) /* If the tail is 0 bytes we are done! */ + bf 30,L(wdus_3) + lhz 7,0(4) + sth 7,0(3) + bf 31,L(wdus_0) + lbz 8,2(4) + stb 8,2(3) + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + .align 4 +L(wdus_3): + bf 31,L(wus_0) + lbz 6,0(4) + stb 6,0(3) + .align 4 +L(wdus_0): + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr +END (memcpy) + +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S new file mode 100644 index 0000000000..f221c32d2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S @@ -0,0 +1,539 @@ +/* Optimized 32-bit memset implementation for POWER6. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (1024 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + + .machine power6 +EALIGN (memset, 7, 0) + CALL_MCOUNT + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rMEMP3 r9 /* Alt mem pointer. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) +/* Align to word boundary. */ + cmplwi cr5, rLEN, 31 + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + + .align 4 +/* Handle the case of size < 31. */ +L(aligned): + mtcrf 0x01, rLEN + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) + nop +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) + + .align 3 +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + beq L(medium) /* We may not actually get to do a full line. */ + nop +/* Storing a non-zero "c" value. We are aligned at a sector (32-byte) + boundary may not be at cache line (128-byte) boundary. */ +L(nzloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmplwi cr1,rLEN,128 + + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + addi rMEMP,rMEMP,32 + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + andi. rTMP,rMEMP,127 + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,128 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + blt cr1,L(cacheAligned1) + b L(nzCacheAligned) + +/* Now we are aligned to the cache line and can use dcbtst. */ + .align 5 +L(nzCacheAligned): + cmplwi cr1,rLEN,128 + cmplwi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + blt cr6,L(nzCacheAligned128) + .align 4 +L(nzCacheAligned128): + nop + addi rMEMP3,rMEMP,64 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + stw rCHR,24(rMEMP) + stw rCHR,28(rMEMP) + stw rCHR,32(rMEMP) + stw rCHR,36(rMEMP) + stw rCHR,40(rMEMP) + stw rCHR,44(rMEMP) + stw rCHR,48(rMEMP) + stw rCHR,52(rMEMP) + stw rCHR,56(rMEMP) + stw rCHR,60(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only one + store per cycle. */ + stw rCHR,0(rMEMP3) + ori r1,r1,0 + stw rCHR,4(rMEMP3) + ori r1,r1,0 + stw rCHR,8(rMEMP3) + ori r1,r1,0 + stw rCHR,12(rMEMP3) + ori r1,r1,0 + stw rCHR,16(rMEMP3) + ori r1,r1,0 + stw rCHR,20(rMEMP3) + ori r1,r1,0 + stw rCHR,24(rMEMP3) + ori r1,r1,0 + stw rCHR,28(rMEMP3) + ori r1,r1,0 + stw rCHR,32(rMEMP3) + ori r1,r1,0 + stw rCHR,36(rMEMP3) + ori r1,r1,0 + stw rCHR,40(rMEMP3) + ori r1,r1,0 + stw rCHR,44(rMEMP3) + ori r1,r1,0 + stw rCHR,48(rMEMP3) + ori r1,r1,0 + stw rCHR,52(rMEMP3) + ori r1,r1,0 + stw rCHR,56(rMEMP3) + ori r1,r1,0 + stw rCHR,60(rMEMP3) + blt cr6,L(cacheAligned1) +#if IS_IN (libc) + lfd 0,-128(rMEMP) +#endif + b L(nzCacheAligned256) + .align 5 +L(nzCacheAligned256): + cmplwi cr1,rLEN,256 + addi rMEMP3,rMEMP,64 +#if !IS_IN (libc) +/* When we are not in libc we should use only GPRs to avoid the FPU lock + interrupt. */ + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + stw rCHR,24(rMEMP) + stw rCHR,28(rMEMP) + stw rCHR,32(rMEMP) + stw rCHR,36(rMEMP) + stw rCHR,40(rMEMP) + stw rCHR,44(rMEMP) + stw rCHR,48(rMEMP) + stw rCHR,52(rMEMP) + stw rCHR,56(rMEMP) + stw rCHR,60(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) +#else +/* We are in libc and this is a long memset so we can use FPRs and can afford + occasional FPU locked interrupts. */ + stfd 0,0(rMEMP) + stfd 0,8(rMEMP) + stfd 0,16(rMEMP) + stfd 0,24(rMEMP) + stfd 0,32(rMEMP) + stfd 0,40(rMEMP) + stfd 0,48(rMEMP) + stfd 0,56(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + stfd 0,0(rMEMP3) + stfd 0,8(rMEMP3) + stfd 0,16(rMEMP3) + stfd 0,24(rMEMP3) + stfd 0,32(rMEMP3) + stfd 0,40(rMEMP3) + stfd 0,48(rMEMP3) + stfd 0,56(rMEMP3) +#endif + bge cr1,L(nzCacheAligned256) + dcbtst 0,rMEMP + b L(cacheAligned1) + + .align 4 +/* Storing a zero "c" value. We are aligned at a sector (32-byte) + boundary but may not be at cache line (128-byte) boundary. If the + remaining length spans a full cache line we can use the Data cache + block zero instruction. */ +L(zloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmplwi cr1,rLEN,128 + beq L(medium) +L(getCacheAligned): + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(cacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) +L(getCacheAligned2): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + addi rMEMP,rMEMP,32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + andi. rTMP,rMEMP,127 + nop + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) +L(getCacheAligned3): + beq L(cacheAligned) +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + addi rLEN,rLEN,-32 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,128 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + cmplwi cr6,rLEN,256 + li rMEMP2,128 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAlignedx) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 4 +L(cacheAligned): + cmplwi cr1,rLEN,128 + cmplwi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + li rMEMP2,128 +L(cacheAlignedx): + cmplwi cr5,rLEN,640 + blt cr6,L(cacheAligned128) + bgt cr5,L(cacheAligned512) + cmplwi cr6,rLEN,512 + dcbz 0,rMEMP + cmplwi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAligned256) + .align 5 +/* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ +L(cacheAligned512): + cmplwi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 + addi rMEMP,rMEMP,128 + b L(cacheAligned512) + .align 5 +L(cacheAligned256): + cmplwi cr6,rLEN,512 + dcbz 0,rMEMP + cmplwi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + bge cr6,L(cacheAligned256) + blt cr1,L(cacheAligned1) + .align 4 +L(cacheAligned128): + dcbz 0,rMEMP + addi rMEMP,rMEMP,128 + addi rLEN,rLEN,-128 + .align 4 +L(cacheAligned1): + cmplwi cr1,rLEN,32 + blt cr1,L(handletail32) + addi rMEMP3,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,32 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) +L(cacheAligned2): + blt cr1,L(handletail32) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,32 + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + nop +L(cacheAligned3): + blt cr1,L(handletail32) +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + ori r1,r1,0 + ori r1,r1,0 + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + +/* We are here because the length or remainder (rLEN) is less than the + cache line/sector size and does not justify aggressive loop unrolling. + So set up the preconditions for L(medium) and go there. */ + .align 3 +L(handletail32): + cmplwi cr1,rLEN,0 + beqlr cr1 + b L(medium) + + .align 4 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt 29, L(medium_29t) +L(medium_29f): + bge cr1, L(medium_27t) + bflr 28 + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt cr1, L(medium_27f) +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies new file mode 100644 index 0000000000..ff9f999749 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies new file mode 100644 index 0000000000..c0e1bea435 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power6/fpu +powerpc/powerpc32/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies new file mode 100644 index 0000000000..d53ce2573c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies new file mode 100644 index 0000000000..c66805ee63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S new file mode 100644 index 0000000000..cb780516b5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S @@ -0,0 +1,41 @@ +/* Round double to long int. POWER6x PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power6" +/* long int[r3] __lrint (double x[fp1]) */ +ENTRY (__lrint) + fctiw fp13,fp1 + mftgpr r3,fp13 + blr + END (__lrint) + +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __lrintf) +weak_alias (__lrint, lrintf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S new file mode 100644 index 0000000000..05b13cd34c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S @@ -0,0 +1,51 @@ +/* lround function. POWER6x, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power6" +ENTRY (__lround) + frin fp2,fp1 /* Pre-round +-0.5. */ + fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + mftgpr r3,fp3 /* Transfer fpr3 to r3. */ + blr + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies new file mode 100644 index 0000000000..ff9f999749 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies new file mode 100644 index 0000000000..c0e1bea435 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power6/fpu +powerpc/powerpc32/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile new file mode 100644 index 0000000000..5e8f4a28ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..45cbaede9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S new file mode 100644 index 0000000000..da4efa0fb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S @@ -0,0 +1,93 @@ +/* finite(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __finite(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __finite, @function + .machine power7 +ENTRY (__finite) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,1 + bflr 30 + + /* We have -INF/+INF/NaN or a denormal. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + + ori 2,2,0 /* Force a new dispatch group. */ + lhz r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + clrlwi r0,r0,17 /* r0 = abs(r0). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */ + bltlr cr7 /* LT means we have a denormal. */ + li r3,0 + blr + END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S new file mode 100644 index 0000000000..668815761a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S @@ -0,0 +1,85 @@ +/* isinf(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isinf(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __isinf, @function + .machine power7 +ENTRY (__isinf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 29 /* If not INF, return. */ + + /* Either we have +INF or -INF. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ + li r3,-1 + blr + END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S new file mode 100644 index 0000000000..433137f1c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S @@ -0,0 +1,90 @@ +/* isnan(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __isnan, @function + .machine power7 +ENTRY (__isnan) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 30 /* If not NaN or Inf, finish. */ + + /* We have -INF/+INF/NaN or a denormal. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lwz r4,8+HIWORD(r1) /* Load the upper half of the FP value. */ + lwz r5,8+LOWORD(r1) /* Load the lower half of the FP value. */ + addi r1,r1,16 /* Reset the stack pointer. */ + lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */ + clrlwi r4,r4,1 /* r4 = abs(r4). */ + cmpw cr7,r4,r0 /* if (abs(r4) <= inf). */ + cmpwi cr6,r5,0 /* r5 == 0x00000000? */ + bltlr cr7 /* LT means we have a denormal. */ + bgt cr7,L(NaN) /* GT means we have a NaN. */ + beqlr cr6 /* EQ means we have +/-INF. */ +L(NaN): + li r3,1 /* x == NaN? */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S new file mode 100644 index 0000000000..9ce8507a82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S @@ -0,0 +1,193 @@ +/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +ENTRY (__memchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 + insrwi r4,r4,8,16 /* Replicate byte to word. */ + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r5 + r7 |= -(r7 < x) */ + add r7,r3,r5 + subfc r6,r3,r7 + subfe r9,r9,r9 + or r7,r7,r9 + + insrwi r4,r4,16,0 + cmplwi r5,16 + li r9, -1 + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + addi r7,r7,-1 +#ifdef __LITTLE_ENDIAN__ + slw r9,r9,r6 +#else + srw r9,r9,r6 +#endif + ble L(small_range) + + lwz r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */ + and r3,r3,r9 + clrlwi r5,r7,30 /* Byte count - 1 in last word. */ + clrrwi r7,r7,2 /* Address of last word. */ + cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bt 29,L(loop_setup) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the last byte of the string, ie. the word at + (s + size - 1) & ~3, or r7. The first word read is at + r8 + 4, we read 2 * cnt words, so the last word read will + be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives + cnt = (r7 - r8) / 8 */ + sub r6,r7,r8 + srwi r6,r6,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for BYTE in the string. Since + it's a small loop (8 instructions), align it to 32-bytes. */ + .align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r6,r9,r3 /* Merge everything in one word. */ + cmplwi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmplw r8,r7 + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + bne cr6,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the words contains BYTE. Check + the first word and decrement the address in case the first + word really contains BYTE. */ + cmplwi cr6,r3,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,4 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r3,-1 + andc r0,r0,r3 + popcntw r0,r0 /* Count trailing zeros. */ +#else + cntlzw r0,r3 /* Count leading zeros before the match. */ +#endif + cmplw r8,r7 /* Are we on the last word? */ + srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 + cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */ + bnelr + blelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r5,0 + beq L(null) + lwz r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 + clrlwi r5,r7,30 /* Byte count - 1 in last word. */ + clrrwi r7,r7,2 /* Address of last word. */ + cmplw r8,r7 /* Are we done already? */ + bne cr7,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + bne cr6,L(done) + blr + +END (__memchr) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S new file mode 100644 index 0000000000..09c9b9bf4d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S @@ -0,0 +1,1375 @@ +/* Optimized memcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power7 +EALIGN (memcmp, 4, 0) + CALL_MCOUNT + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmplwi cr6, rN, 0 + cmplwi cr1, rN, 12 + clrlwi. r0, r0, 30 + clrlwi r12, rSTR1, 30 + cmplwi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) + stw rWORD8, 48(r1) + stw rWORD7, 44(r1) + cfi_offset(rWORD8, (48-64)) + cfi_offset(rWORD7, (44-64)) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet word aligned). So we force the string addresses to the next lower + word boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrwi rSTR1, rSTR1, 2 + clrrwi rSTR2, rSTR2, 2 + beq cr5, L(Waligned) + add rN, rN, r12 + slwi rWORD6, r12, 3 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the word remainder */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 4 */ + .align 3 +L(dsP1): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 8 */ + .align 4 +L(dPs2): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 12 */ + .align 4 +L(dPs3): + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD2, rWORD6 + cmplw cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD2, rWORD6 + cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Waligned): + andi. r12, rN, 12 /* Get the word remainder */ + srwi r0, rN, 4 /* Divide by 16 */ + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 4 */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + .align 3 +L(dP1x): + slwi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 8 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP2): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + slwi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 12 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP3): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 0(rSTR1) + lwz rWORD4, 0(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + slwi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + bne cr7, L(dLcr7x) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP4): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmplw cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + slwi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use + shift right to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr7): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr7x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr1): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr1x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr1 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr6): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr6x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr6 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr5): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr5x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmplw cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmplw cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmplw cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmplw cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmplw cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not already word aligned yet. + So we can force the string addresses to the next lower word + boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + cfi_adjust_cfa_offset(64) +L(unaligned): + stw rSHL, 40(r1) + cfi_offset(rSHL, (40-64)) + clrlwi rSHL, rSTR2, 30 + stw rSHR, 36(r1) + cfi_offset(rSHR, (36-64)) + beq cr5, L(Wunaligned) + stw rWORD8_SHIFT, 32(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ + clrlwi rSHL, rWORD8_SHIFT, 30 + clrrwi rSTR1, rSTR1, 2 + stw rWORD4_SHIFT, 24(r1) + slwi rSHL, rSHL, 3 + cmplw cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + slwi rWORD6, r12, 3 + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + subfic rSHR, rSHL, 32 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the W remainder */ +/* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 4 +#endif + slw rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + srw r12, rWORD2, rSHR + clrlwi rN, rN, 30 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 4 */ + .align 4 +L(dusP1): + slw rWORD8_SHIFT, rWORD2, rSHL + slw rWORD7, rWORD1, rWORD6 + slw rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duPs2): + slw rWORD6_SHIFT, rWORD2, rSHL + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 12 */ + .align 4 +L(duPs3): + slw rWORD4_SHIFT, rWORD2, rSHL + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + slw rWORD2_SHIFT, rWORD2, rSHL + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Wunaligned): + stw rWORD8_SHIFT, 32(r1) + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) + srwi r0, rN, 4 /* Divide by 16 */ + stw rWORD4_SHIFT, 24(r1) + andi. r12, rN, 12 /* Get the W remainder */ + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + slwi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 4 + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD6, 0(rSTR2) + lwzu rWORD8, 4(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + subfic rSHR, rSHL, 32 + slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 4 */ + .align 4 +L(duP1): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD7, 0(rSTR1) +#endif + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 8(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duP2): + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + slw rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmplw cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 12 */ + .align 4 +L(duP3): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD3, 0(rSTR1) +#endif + slw rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD1, 0(rSTR1) +#endif + slw rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmplw cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmplw cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + slwi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. + This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 +#else + lwz rWORD1, 4(rSTR1) +#endif + lwz rWORD8, 48(r1) + subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + lwz rWORD7, 44(r1) + lwz rSHL, 40(r1) + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + lwz rSHR, 36(r1) + lwz rWORD8_SHIFT, 32(r1) + sub rRTN, rWORD1, rWORD2 + b L(dureturn26) + .align 4 +L(duLcr7): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) +L(dureturn29): + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) +L(dureturn27): + lwz rWORD8_SHIFT, 32(r1) +L(dureturn26): + lwz rWORD2_SHIFT, 28(r1) +L(dureturn25): + lwz rWORD4_SHIFT, 24(r1) + lwz rWORD6_SHIFT, 20(r1) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + blr +END (memcmp) + +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S new file mode 100644 index 0000000000..8e33c1d733 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S @@ -0,0 +1,538 @@ +/* Optimized memcpy implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + + .machine power7 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + stw 31,24(1) + mr 30,3 + cmplwi cr1,5,31 + neg 0,3 + cfi_offset(31,-8) + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 10,4,28 /* Check alignment of SRC. */ + cmplw cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srwi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrlwi 0,0,29 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrlwi 10,12,29 /* Check alignment of SRC again. */ + srwi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrlwi 11,31,29 + mtcrf 0x01,9 + + srwi 8,31,5 + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + lfd 6,0(12) + lfd 7,8(12) + addi 11,12,16 + mtctr 8 + stfd 6,0(3) + stfd 7,8(3) + addi 10,3,16 + bf 31,4f + lfd 0,16(12) + stfd 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + lfd 6,0(12) + addi 11,12,8 + stfd 6,0(3) + addi 10,3,8 + +L(aligned_copy): + /* Main aligned copy loop. Copies up to 128-bytes at a time. */ + .align 4 +4: + /* check for any 32-byte or 64-byte lumps that are outside of a + nice 128-byte range. R8 contains the number of 32-byte + lumps, so drop this into the CR, and use the SO/EQ bits to help + handle the 32- or 64- byte lumps. Then handle the rest with an + unrolled 128-bytes-at-a-time copy loop. */ + mtocrf 1,8 + li 6,16 # 16() index + li 7,32 # 32() index + li 8,48 # 48() index + +L(aligned_32byte): + /* if the SO bit (indicating a 32-byte lump) is not set, move along. */ + bns cr7,L(aligned_64byte) + lxvd2x 6,0,11 + lxvd2x 7,11,6 + addi 11,11,32 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + addi 10,10,32 + +L(aligned_64byte): + /* if the EQ bit (indicating a 64-byte lump) is not set, move along. */ + bne cr7,L(aligned_128setup) + lxvd2x 6,0,11 + lxvd2x 7,11,6 + lxvd2x 8,11,7 + lxvd2x 9,11,8 + addi 11,11,64 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + addi 10,10,64 + +L(aligned_128setup): + /* Set up for the 128-byte at a time copy loop. */ + srwi 8,31,7 + cmpwi 8,0 # Any 4x lumps left? + beq 3f # if not, move along. + lxvd2x 6,0,11 + lxvd2x 7,11,6 + mtctr 8 # otherwise, load the ctr and begin. + li 8,48 # 48() index + b L(aligned_128loop) + +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,11 + lxvd2x 7,11,6 +L(aligned_128loop): + lxvd2x 8,11,7 + lxvd2x 9,11,8 + stxvd2x 6,0,10 + addi 11,11,64 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + lxvd2x 6,0,11 + lxvd2x 7,11,6 + addi 10,10,64 + lxvd2x 8,11,7 + lxvd2x 9,11,8 + addi 11,11,64 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + addi 10,10,64 + bdnz L(aligned_128head) + +3: + /* Check for tail bytes. */ + clrrwi 0,31,3 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmplwi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + cmplwi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmplwi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used lfd/stfd here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 0,0,28 /* Number of bytes until the 1st + quadword of DST. */ + srwi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + lfd 6,0(12) + addi 12,12,8 + stfd 6,0(3) + addi 3,3,8 +0: + clrlwi 10,12,28 /* Check alignment of SRC. */ + srwi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrlwi 11,31,28 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmplwi cr1,11,0 + srwi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + clrrwi 0,31,4 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S new file mode 100644 index 0000000000..1682fbcd2a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S @@ -0,0 +1,482 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst' + 'len'. */ + + .machine power7 +EALIGN (__mempcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + stw 31,24(1) + mr 30,3 + cmplwi cr1,5,31 + neg 0,3 + cfi_offset(31,-8) + ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,7 /* Check alignment of DST. */ + clrlwi 10,4,29 /* Check alignment of SRC. */ + cmplw cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srwi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrlwi 0,0,29 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrlwi 10,12,29 /* Check alignment of SRC again. */ + srwi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrlwi 11,31,29 + mtcrf 0x01,9 + + srwi 8,31,5 + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + lfd 6,0(12) + lfd 7,8(12) + addi 11,12,16 + mtctr 8 + stfd 6,0(3) + stfd 7,8(3) + addi 10,3,16 + bf 31,4f + lfd 0,16(12) + stfd 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + lfd 6,0(12) + addi 11,12,8 + stfd 6,0(3) + addi 10,3,8 + + .align 4 +4: /* Main aligned copy loop. Copies 32-bytes at a time. */ + lfd 6,0(11) + lfd 7,8(11) + lfd 8,16(11) + lfd 0,24(11) + addi 11,11,32 + + stfd 6,0(10) + stfd 7,8(10) + stfd 8,16(10) + stfd 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + /* Check for tail bytes. */ + + clrrwi 0,31,3 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmplwi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + cmplwi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmplwi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used lfd/stfd here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 0,0,28 /* Number of bytes until the 1st + quadword of DST. */ + srwi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + lfd 6,0(12) + addi 12,12,8 + stfd 6,0(3) + addi 3,3,8 +0: + clrlwi 10,12,28 /* Check alignment of SRC. */ + srwi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrlwi 11,31,28 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmplwi cr1,11,0 + srwi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + clrrwi 0,31,4 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +END (__mempcpy) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S new file mode 100644 index 0000000000..eb0c1bb8eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S @@ -0,0 +1,196 @@ +/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +ENTRY (__memrchr) + CALL_MCOUNT + add r7,r3,r5 /* Calculate the last acceptable address. */ + neg r0,r7 + addi r7,r7,-1 + mr r10,r3 + clrrwi r6,r7,7 + li r9,3<<5 + dcbt r9,r6,16 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + li r6,-4 + li r9,-1 + rlwinm r0,r0,3,27,28 /* Calculate padding. */ + clrrwi r8,r7,2 + srw r9,r9,r0 + cmplwi r5,16 + clrrwi r0,r10,2 + ble L(small_range) + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 /* Load reversed word from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bf 29,L(loop_setup) + + /* Handle WORD2 of pair. */ +#ifdef __LITTLE_ENDIAN__ + lwzx r12,r8,r6 +#else + lwbrx r12,r8,r6 +#endif + addi r8,r8,-4 + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the first byte of the string, ie. the word at + s & ~3, or r0. The first word read is at r8 - 4, we + read 2 * cnt words, so the last word read will be at + r8 - 4 - 8 * cnt + 4. Solving for cnt gives + cnt = (r8 - r0) / 8 */ + sub r5,r8,r0 + addi r8,r8,-4 + srwi r9,r5,3 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ + + /* Main loop to look for BYTE backwards in the string. + FIXME: Investigate whether 32 byte align helps with this + 9 instruction loop. */ + .align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 + lwzx r11,r8,r6 +#else + lwbrx r12,0,r8 + lwbrx r11,r8,r6 +#endif + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r5,r9,r3 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) + addi r8,r8,-8 + bdnz L(loop) + + /* We may have one more word to read. */ + cmplw r8,r0 + bnelr + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the words contains BYTE. Check + the first word. */ + cmplwi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,-4 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ +L(done): + cntlzw r9,r3 /* Count leading zeros before the match. */ + cmplw r8,r0 /* Are we on the last word? */ + srwi r6,r9,3 /* Convert leading zeros to bytes. */ + addi r0,r6,-3 + sub r3,r8,r0 + cmplw cr7,r3,r10 + bnelr + bgelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r5,0 + beq L(null) + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 /* Load reversed word from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ + cmplw r8,r0 + addi r8,r8,-4 + beqlr + + .align 5 +L(loop_small): +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmplw r8,r0 + cmplwi cr7,r3,0 + bne cr7,L(done) + addi r8,r8,-4 + bne L(loop_small) + blr + +END (__memrchr) +weak_alias (__memrchr, memrchr) +libc_hidden_builtin_def (memrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S new file mode 100644 index 0000000000..b431f5086d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S @@ -0,0 +1,431 @@ +/* Optimized memset implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + + .machine power7 +EALIGN (memset, 5, 0) + CALL_MCOUNT + + .align 4 +L(_memset): + cmplwi cr7,5,31 + cmplwi cr6,5,8 + mr 10,3 /* Save original argument for later. */ + mr 7,1 /* Save original r1 for later. */ + cfi_offset(31,-8) + + /* Replicate byte to word. */ + insrwi 4,4,8,16 + insrwi 4,4,16,0 + + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 + ble cr7,L(medium) /* If length < 32, use medium copy code. */ + + /* Save our word twice to create a doubleword that we will later + copy to a FPR. */ + stwu 1,-32(1) + andi. 11,10,7 /* Check alignment of DST. */ + mr 12,5 + stw 4,24(1) + stw 4,28(1) + beq L(big_aligned) + + clrlwi 0,0,29 + mtocrf 0x01,0 + subf 5,0,5 + + /* Get DST aligned to 8 bytes. */ +1: bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: bf 30,4f + + sth 4,0(10) + addi 10,10,2 +4: bf 29,L(big_aligned) + + stw 4,0(10) + addi 10,10,4 + + .align 4 +L(big_aligned): + cmplwi cr5,5,255 + li 0,32 + cmplwi cr1,5,160 + dcbtst 0,10 + cmplwi cr6,4,0 + srwi 9,5,3 /* Number of full doublewords remaining. */ + crand 27,26,21 + mtocrf 0x01,9 + bt 27,L(huge) + + /* From this point on, we'll copy 32+ bytes and the value + isn't 0 (so we can't use dcbz). */ + + srwi 8,5,5 + clrlwi 11,5,29 + cmplwi cr6,11,0 + cmplwi cr1,9,4 + mtctr 8 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 + bf 31,L(big_loop) + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 + mr 12,10 + blt cr1,L(tail_bytes) + + b L(big_loop) + + .align 4 +1: /* Copy 1 doubleword. */ + bf 31,L(big_loop) + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 + + /* First use a 32-bytes loop with stw's to try and avoid the LHS due + to the lfd we will do next. Also, ping-pong through r10 and r12 + to avoid AGEN delays. */ + .align 4 +L(big_loop): + addi 12,10,32 + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + stw 4,16(10) + stw 4,20(10) + stw 4,24(10) + stw 4,28(10) + bdz L(tail_bytes) + + addi 10,10,64 + stw 4,0(12) + stw 4,4(12) + stw 4,8(12) + stw 4,12(12) + stw 4,16(12) + stw 4,20(12) + stw 4,24(12) + stw 4,28(12) + bdnz L(big_loop_fast_setup) + + mr 12,10 + b L(tail_bytes) + + /* Now that we're probably past the LHS window, use the VSX to + speed up the loop. */ +L(big_loop_fast_setup): + li 11,24 + li 6,16 + lxvdsx 4,1,11 + + .align 4 +L(big_loop_fast): + addi 12,10,32 + stxvd2x 4,0,10 + stxvd2x 4,10,6 + bdz L(tail_bytes) + + addi 10,10,64 + stxvd2x 4,0,12 + stxvd2x 4,12,6 + bdnz L(big_loop_fast) + + mr 12,10 + + .align 4 +L(tail_bytes): + + /* Check for tail bytes. */ + mr 1,7 /* Restore r1. */ + beqlr cr6 + + clrlwi 0,5,29 + mtocrf 0x01,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(12) + addi 12,12,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + sth 4,0(12) + addi 12,12,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(12) + blr + + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out 128-bytes at a time. Before using + dcbz though, we need to get the destination 128-bytes aligned. */ + .align 4 +L(huge): + lfd 4,24(1) + andi. 11,10,127 + neg 0,10 + beq L(huge_aligned) + + clrlwi 0,0,25 + subf 5,0,5 + srwi 0,0,3 + mtocrf 0x01,0 + + /* Get DST aligned to 128 bytes. */ +8: bf 28,4f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + stfd 4,32(10) + stfd 4,40(10) + stfd 4,48(10) + stfd 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + stfd 4,0(10) + stfd 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(huge_aligned) + + stfd 4,0(10) + addi 10,10,8 + +L(huge_aligned): + srwi 8,5,7 + clrlwi 11,5,25 + cmplwi cr6,11,0 + mtctr 8 + + /* Copies 128-bytes at a time. */ + .align 4 +L(huge_loop): + dcbz 0,10 + addi 10,10,128 + bdnz L(huge_loop) + + /* We have a tail of 0~127 bytes to handle. */ + mr 1,7 /* Restore r1. */ + beqlr cr6 + + subf 9,3,10 + subf 5,9,12 + srwi 8,5,3 + cmplwi cr6,8,0 + mtocrf 0x01,8 + + /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for + speed. We'll handle the resulting tail bytes later. */ + beq cr6,L(tail) + +8: bf 28,4f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + stfd 4,32(10) + stfd 4,40(10) + stfd 4,48(10) + stfd 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + stfd 4,0(10) + stfd 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(tail) + + stfd 4,0(10) + addi 10,10,8 + + /* Handle the rest of the tail bytes here. */ +L(tail): + mtocrf 0x01,5 + + .align 4 +4: bf 29,2f + + stw 4,0(10) + addi 10,10,4 + .align 4 +2: bf 30,1f + + sth 4,0(10) + addi 10,10,2 + .align 4 +1: bflr 31 + + stb 4,0(10) + blr + + + /* Expanded tree to copy tail bytes without increments. */ + .align 4 +L(copy_tail): + bf 29,L(FXX) + + stw 4,0(10) + bf 30,L(TFX) + + sth 4,4(10) + bflr 31 + + stb 4,6(10) + blr + + .align 4 +L(FXX): bf 30,L(FFX) + + sth 4,0(10) + bflr 31 + + stb 4,2(10) + blr + + .align 4 +L(TFX): bflr 31 + + stb 4,4(10) + blr + + .align 4 +L(FFX): bflr 31 + + stb 4,0(10) + blr + + /* Handle copies of 9~31 bytes. */ + .align 4 +L(medium): + /* At least 9 bytes to go. */ + andi. 11,10,3 + clrlwi 0,0,30 + beq L(medium_aligned) + + /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 +1: /* Copy 1 byte. */ + bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: /* Copy 2 bytes. */ + bf 30,L(medium_aligned) + + sth 4,0(10) + addi 10,10,2 + + .align 4 +L(medium_aligned): + /* At least 6 bytes to go, and DST is word-aligned. */ + cmplwi cr1,5,16 + mtocrf 0x01,5 + blt cr1,8f + + /* Copy 16 bytes. */ + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(10) + addi 10,10,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + sth 4,0(10) + addi 10,10,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(small): + mtocrf 0x01,5 + bne cr6,L(copy_tail) + + stw 4,0(10) + stw 4,4(10) + blr + +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies new file mode 100644 index 0000000000..22c12fd393 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S new file mode 100644 index 0000000000..22edcfb209 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S @@ -0,0 +1,110 @@ +/* Optimized rawmemchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] rawmemchr (void *s [r3], int c [r4]) */ + .machine power7 +ENTRY (__rawmemchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + /* Now r4 has a word of c bytes. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ +#ifdef __LITTLE_ENDIAN__ + srw r5,r5,r6 + slw r5,r5,r6 +#else + slw r5,r5,r6 /* Move left to discard ignored bits. */ + srw r5,r5,r6 /* Bring the bits back as zeros. */ +#endif + cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r5,r12,r4 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r5,r12,r4 + cmpb r6,r11,r4 + or r7,r5,r6 + cmpwi cr7,r7,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a 'c' byte. Check + the first word and decrement the address in case the first + word really contains a c byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The 'c' byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + mr r5,r6 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the 'c' byte in the original + word from the string. Use that fact to find out what is + the position of the byte inside the string. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr +END (__rawmemchr) +weak_alias (__rawmemchr,rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S new file mode 100644 index 0000000000..964875a13b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S @@ -0,0 +1,129 @@ +/* Optimized strcasecmp implementation for PowerPC32. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (__STRCMP) + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Byte tolower(rCHAR1) */ +#define rLWR2 r12 /* Byte tolower(rCHAR2) */ +#define rTMP r0 +#define rGOT r9 /* Address of the Global Offset Table */ +#define rLOC r11 /* Default locale address */ + + cmpw cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL +# ifdef SHARED + mflr rTMP + bcl 20,31,.L1 +.L1: mflr rGOT + addis rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@ha + addi rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@l + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) + mtlr rTMP +# else + lis rTMP,_GLOBAL_OFFSET_TABLE_@ha + la rLOC,_GLOBAL_OFFSET_TABLE_@l(rTMP) + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) +# endif /* SHARED */ +#else + mr rLOC, rLOCARG +#endif + mr rSTR1, rRTN + lwz rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + li rRTN, 0 + beqlr cr7 + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + +L(loop): + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpwi cr7, rCHAR1, 0 /* *s1 == '\0' ? */ + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + bne cr7,L(loop) + blr +END (__STRCMP) + +weak_alias (__STRCMP, STRCMP) +libc_hidden_builtin_def (__STRCMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S new file mode 100644 index 0000000000..75ca6acb98 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S @@ -0,0 +1,225 @@ +/* Optimized strchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (strchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + cmpwi cr7,r4,0 + lwz r12,0(r8) /* Load word from memory. */ + li r0,0 /* Word with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + /* Now r4 has a word of c bytes and r0 has + a word of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the words left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + srw r11,r11,r6 + slw r10,r10,r6 + slw r11,r11,r6 +#else + slw r10,r10,r6 + slw r11,r11,r6 + srw r10,r10,r6 + srw r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r9,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r12,r10,r11 + or r9,r6,r7 + or r5,r12,r9 + cmpwi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a c/null byte. Check + the first word and decrement the address in case the first + word really contains a c/null byte. */ + + cmpwi cr6,r12,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The c/null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10/r11 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,4 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntw r0,r3 + addi r4,r11,-1 + andc r4,r4,r11 + cmplw cr7,r3,r4 + bgt cr7,L(no_match) +#else + cntlzw r0,r10 /* Count leading zeros before c matches. */ + cmplw cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + .align 4 +L(no_match): + li r3,0 + blr + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a word of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the words left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r5,r5,r6 + slw r5,r5,r6 +#else + slw r5,r5,r6 + srw r5,r5,r6 +#endif + cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_null) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r5,r12,r0 + cmpwi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpwi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done_null) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (strchr) +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S new file mode 100644 index 0000000000..426137e11d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S @@ -0,0 +1,127 @@ +/* Optimized strchrnul implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strchrnul (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (__strchrnul) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + li r0,0 /* Word with null chars to use + with cmpb. */ + + /* Now r4 has a word of c bytes and r0 has + a word of null bytes. */ + + cmpb r10,r12,r0 /* Compare each byte against c byte. */ + cmpb r9,r12,r4 /* Compare each byte against null byte. */ + + /* Move the words left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + srw r9,r9,r6 + slw r10,r10,r6 + slw r9,r9,r6 +#else + slw r10,r10,r6 + slw r9,r9,r6 + srw r10,r10,r6 + srw r9,r9,r6 +#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + or r5,r9,r10 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 + cmpb r7,r11,r4 + or r5,r9,r10 + or r10,r6,r7 + or r11,r5,r10 + cmpwi cr7,r11,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a c/null byte. Check + the first word and decrement the address in case the first + word really contains a c/null byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The c/null byte must be in the second word. Adjust the address + again and move the result of cmpb to r5 so we can calculate the + pointer. */ + mr r5,r10 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +END (__strchrnul) +weak_alias (__strchrnul,strchrnul) +libc_hidden_builtin_def (__strchrnul) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S new file mode 100644 index 0000000000..3699791fa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S @@ -0,0 +1,102 @@ +/* Optimized strlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strlen (char *s [r3]) */ + .machine power7 +ENTRY (strlen) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r4,r3,2 /* Align the address to word boundary. */ + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + li r0,0 /* Word with null chars to use with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + lwz r12,0(r4) /* Load word from memory. */ +#ifdef __LITTLE_ENDIAN__ + slw r5,r5,r6 +#else + srw r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in WORD1. */ + cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r4 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r4) + cmpb r10,r12,r0 + cmpwi cr7,r10,0 + bne cr7,L(done) + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + lwz r12, 4(r4) + lwzu r11, 8(r4) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r8,r9,r10 /* Merge everything in one word. */ + cmpwi cr7,r8,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + + cmpwi cr6,r10,0 + addi r4,r4,-4 + bne cr6,L(done) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r4,r4,4 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntw r0, r9 /* Count the bits in the mask. */ +#else + cntlzw r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S new file mode 100644 index 0000000000..d4598e1930 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -0,0 +1,199 @@ +/* Optimized strcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (strncmp,5,0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + nop + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + nop + clrlwi. rTMP,rTMP,30 + cmplwi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP,rN,2 + clrlwi rN,rN,30 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmplwi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1,4(rSTR1) + bne cr1,L(different) + lwzu rWORD2,4(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzw rBITDIF,rBITDIF + cntlzw rNEG,rNEG + addi rNEG,rNEG,7 + cmpw cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + bgelr cr1 +L(equal): + li rRTN,0 + blr + +L(different): + lwz rWORD1,-4(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + bgelr +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,4 + bne cr1,L(different) + addi rSTR2,rSTR2,4 + cmplwi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + b L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S new file mode 100644 index 0000000000..6019d5be5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S @@ -0,0 +1,176 @@ +/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (__strnlen) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmplwi r4,16 + li r0,0 /* Word with null chars. */ + addi r7,r7,-1 + ble L(small_range) + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + slw r10,r10,r6 +#else + slw r10,r10,r6 + srw r10,r10,r6 +#endif + cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + clrrwi r7,r7,2 /* Address of last word. */ + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_setup) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr7,r10,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the last byte of the string, ie. the word at + (s + size - 1) & ~3, or r7. The first word read is at + r8 + 4, we read 2 * cnt words, so the last word read will + be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives + cnt = (r7 - r8) / 8 */ + sub r5,r7,r8 + srwi r6,r5,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more word to read. */ + cmplw cr6,r8,r7 + beq cr6,L(end_max) + + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr6,r10,0 + bne cr6,L(done) + +L(end_max): + mr r3,r4 + blr + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + .align 4 +L(found): + cmplwi cr6,r10,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,4 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r10,-1 + andc r0,r0,r10 + popcntw r0,r0 +#else + cntlzw r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 + srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmplw r3,r4 + blelr + mr r3,r4 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r4,0 + beq L(end_max) + + clrrwi r7,r7,2 /* Address of last word. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in WORD1. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + slw r10,r10,r6 +#else + slw r10,r10,r6 + srw r10,r10,r6 +#endif + cmplwi cr7,r10,0 + bne cr7,L(done) + + cmplw r8,r7 + beq L(end_max) + + .p2align 5 +L(loop_small): + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr6,r10,0 + bne cr6,L(done) + cmplw r8,r7 + bne L(loop_small) + mr r3,r4 + blr + +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_builtin_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies new file mode 100644 index 0000000000..083f3e950a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power7/fpu +powerpc/powerpc32/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..43a3b83e2a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies new file mode 100644 index 0000000000..f18504408f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies new file mode 100644 index 0000000000..066dea2798 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power8/fpu +powerpc/powerpc32/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..4393b56872 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies new file mode 100644 index 0000000000..1a46ef0035 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S new file mode 100644 index 0000000000..8a6b205c37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S @@ -0,0 +1,104 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This would be bad. */ +#ifdef PROF +#undef PROF +#endif + +#include <sysdep.h> + +/* We do profiling as described in the SYSV ELF ABI, except that glibc + _mcount manages its own counters. The caller has put the address the + caller will return to in the usual place on the stack, 4(r1). _mcount + is responsible for ensuring that when it returns no argument-passing + registers are disturbed, and that the LR is set back to (what the + caller sees as) 4(r1). + + This is intended so that the following code can be inserted at the + front of any routine without changing the routine: + + .data + mflr r0 + stw r0,4(r1) + bl _mcount +*/ + +ENTRY(_mcount) +#if defined PIC && !defined SHARED +# define CALLER_LR_OFFSET 68 + stwu r1,-64(r1) + cfi_adjust_cfa_offset (64) + stw r30, 48(r1) + cfi_rel_offset (r30, 48) +#else +# define CALLER_LR_OFFSET 52 + stwu r1,-48(r1) + cfi_adjust_cfa_offset (48) +#endif +/* We need to save the parameter-passing registers. */ + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r4 +#if defined PIC && !defined SHARED + bcl 20,31,0f +0: + mflr r30 + addis r30, r30, _GLOBAL_OFFSET_TABLE_-0b@ha + addi r30, r30, _GLOBAL_OFFSET_TABLE_-0b@l +#endif + lwz r3, CALLER_LR_OFFSET(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r4, 44(r1) + cfi_rel_offset (lr, 44) + stw r5, 8(r1) +#ifndef SHARED + bl JUMPTARGET(__mcount_internal) +#else + bl __mcount_internal@local +#endif + /* Restore the registers... */ + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcrf 0xff,r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, CALLER_LR_OFFSET(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + /* ...unwind the stack frame, and return to your usual programming. */ +#if defined PIC && !defined SHARED + lwz r30, 48(r1) + addi r1,r1,64 +#else + addi r1,r1,48 +#endif + bctr +END(_mcount) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h b/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h new file mode 100644 index 0000000000..6e533a75a3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h @@ -0,0 +1,120 @@ +/* Dump registers. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/uio.h> +#include <_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +fp0-3: 0000030%0000031% 0000032%0000033% 0000034%0000035% 0000036%0000037%\n\ +fp4-7: 0000038%0000039% 000003a%000003b% 000003c%000003d% 000003e%000003f%\n\ +fp8-11: 0000040%0000041% 0000042%0000043% 0000044%0000045% 0000046%0000047%\n\ +fp12-15: 0000048%0000049% 000004a%000004b% 000004c%000004d% 000004e%000004f%\n\ +fp16-19: 0000050%0000051% 0000052%0000053% 0000054%0000055% 0000056%0000057%\n\ +fp20-23: 0000058%0000059% 000005a%000005b% 000005c%000005d% 000005e%000005f%\n\ +fp24-27: 0000060%0000061% 0000062%0000063% 0000064%0000065% 0000066%0000067%\n\ +fp28-31: 0000068%0000069% 000006a%000006b% 000006c%000006d% 000006e%000006f%\n\ +r0 =0000000% sp =0000001% r2 =0000002% r3 =0000003% trap=0000028%\n\ +r4 =0000004% r5 =0000005% r6 =0000006% r7 =0000007% sr0=0000020% sr1=0000021%\n\ +r8 =0000008% r9 =0000009% r10=000000a% r11=000000b% dar=0000029% dsi=000002a%\n\ +r12=000000c% r13=000000d% r14=000000e% r15=000000f% r3*=0000022%\n\ +r16=0000010% r17=0000011% r18=0000012% r19=0000013%\n\ +r20=0000014% r21=0000015% r22=0000016% r23=0000017% lr=0000024% xer=0000025%\n\ +r24=0000018% r25=0000019% r26=000001a% r27=000001b% mq=0000027% ctr=0000023%\n\ +r28=000001c% r29=000001d% r30=000001e% r31=000001f% fscr=0000071% ccr=0000026%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned *regs = (unsigned *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer) - 1); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S b/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S new file mode 100644 index 0000000000..58abdcad59 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S @@ -0,0 +1,55 @@ +/* Shift a limb right, low level routine. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr r3 + s1_ptr r4 + size r5 + cnt r6 */ + +ENTRY (__mpn_rshift) + mtctr r5 # copy size into CTR + addi r7,r3,-4 # move adjusted res_ptr to free return reg + subfic r8,r6,32 + lwz r11,0(r4) # load first s1 limb + slw r3,r11,r8 # compute function return value + bdz L(1) + +L(0): lwzu r10,4(r4) + srw r9,r11,r6 + slw r12,r10,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdz L(2) + lwzu r11,4(r4) + srw r9,r10,r6 + slw r12,r11,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdnz L(0) + +L(1): srw r0,r11,r6 + stw r0,4(r7) + blr + +L(2): srw r0,r10,r6 + stw r0,4(r7) + blr +END (__mpn_rshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c new file mode 100644 index 0000000000..f3ed8ad1e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c @@ -0,0 +1,4 @@ +/* PPCA2 has a different cache-line size than the usual 128 bytes. To avoid + using code that assumes cache-line size to be 128 bytes (with dcbz + instructions) we use the generic code instead. */ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S new file mode 100644 index 0000000000..c74c492cec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S @@ -0,0 +1,78 @@ +/* setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT) +# define SAVE_GP(N) evstdd r##N,((JB_FPRS+((N)-14)*2)*4)(3) +#else +# define SAVE_GP(N) stw r##N,((JB_GPRS+(N)-14)*4)(3) +#endif + +ENTRY (__sigsetjmp_symbol) + +#ifdef PTR_MANGLE + mr r5,r1 + PTR_MANGLE(r5, r10) + stw r5,(JB_GPR1*4)(3) +#else + stw r1,(JB_GPR1*4)(3) +#endif + mflr r0 + /* setjmp probe expects longjmp first argument (4@3), second argument + (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (setjmp, 3, 4@3, -4@4, 4@0) + SAVE_GP (14) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r10) + li r10,0 +#endif + stw r0,(JB_LR*4)(3) + SAVE_GP (15) + mfcr r0 + SAVE_GP (16) + stw r0,(JB_CR*4)(3) + SAVE_GP (17) + SAVE_GP (18) + SAVE_GP (19) + SAVE_GP (20) + SAVE_GP (21) + SAVE_GP (22) + SAVE_GP (23) + SAVE_GP (24) + SAVE_GP (25) + SAVE_GP (26) + SAVE_GP (27) + SAVE_GP (28) + SAVE_GP (29) + SAVE_GP (30) + SAVE_GP (31) +#if IS_IN (rtld) + li r3,0 + blr +#else + b __sigjmp_save_symbol@local +#endif +END (__sigsetjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S new file mode 100644 index 0000000000..2800466276 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S @@ -0,0 +1,46 @@ +/* non altivec (old) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <shlib-compat.h> +#include <libc-symbols.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +libc_hidden_ver (__vmx__sigsetjmp, __sigsetjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h new file mode 100644 index 0000000000..b3d0af830f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h @@ -0,0 +1,14 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("lwz %0,-28680(2)" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ \ + uintptr_t x; \ + asm ("lwz %0,%1(2)" \ + : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \ + ); \ + x; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/start.S b/REORG.TODO/sysdeps/powerpc/powerpc32/start.S new file mode 100644 index 0000000000..d510a56c0f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/start.S @@ -0,0 +1,95 @@ +/* Startup code for programs linked with GNU libc. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* We do not want .eh_frame info for crt1.o since crt1.o is linked + before crtbegin.o, the file defining __EH_FRAME_BEGIN__. */ +#undef cfi_startproc +#define cfi_startproc +#undef cfi_endproc +#define cfi_endproc + + /* These are the various addresses we require. */ +#ifdef PIC + .section ".data" +#else + .section ".rodata" +#endif + .align 2 +L(start_addresses): + .long _SDA_BASE_ + .long main + .long __libc_csu_init + .long __libc_csu_fini + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrwi r1,r1,4 +#ifdef PIC + SETUP_GOT_ACCESS(r13,got_label) + li r0,0 +#else + li r0,0 +#endif + stwu r1,-16(r1) + mtlr r0 + stw r0,0(r1) + /* Set r13 to point at the 'small data area', and put the address of + start_addresses in r8. Also load the GOT pointer so that new PLT + calls work, like the one to __libc_start_main. */ +#ifdef PIC + addis r30,r13,_GLOBAL_OFFSET_TABLE_-got_label@ha + addis r8,r13,L(start_addresses)-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwzu r13, L(start_addresses)-got_label@l(r8) +#else + lis r8,L(start_addresses)@ha + lwzu r13,L(start_addresses)@l(r8) +#endif + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(__libc_start_main) +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S new file mode 100644 index 0000000000..6ef249f80d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S @@ -0,0 +1,119 @@ +/* Optimized stpcpy implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (__stpcpy, 4, 0) + +#define rTMP r0 +#define rRTN r3 +#define rDEST r3 /* pointer to previous word in dest */ +#define rSRC r4 /* pointer to previous word in src */ +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* 0xfefefeff */ +#define r7F7F r8 /* 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + + or rTMP, rSRC, rDEST + clrlwi. rTMP, rTMP, 30 + addi rDEST, rDEST, -4 + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rDEST) + blr +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + stbu rALT, 1(rDEST) + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rDEST, 3 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stbu rWORD, 1(rDEST) + blr +L(u1): stbu rALT, 1(rDEST) + blr +END (__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S new file mode 100644 index 0000000000..868cbd46aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S @@ -0,0 +1,146 @@ +/* Optimized strchr implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +ENTRY (strchr) + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + + rlwimi rCHR, rCHR, 8, 16, 23 + li rMASK, -1 + rlwimi rCHR, rCHR, 16, 0, 15 + rlwinm rIGN, rRTN, 3, 27, 28 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrwi rSTR, rRTN, 2 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f +/* Test the first (partial?) word. */ + lwz rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + slw rMASK, rMASK, rIGN +#else + srw rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + lwzu rWORD, 4(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmplw rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 32-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 + cmplw rWORD, rTMP2 + bgtlr +#endif + srwi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + subfic rCLZB, rCLZB, 32-7-32 + srawi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 + subi rSTR, rSTR, 4 + srwi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S new file mode 100644 index 0000000000..52c4bbc1f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S @@ -0,0 +1,150 @@ +/* Optimized strcmp implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +EALIGN (strcmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 + + + or rTMP, rSTR2, rSTR1 + clrlwi. rTMP, rTMP, 30 + lis rFEFE, -0x101 + bne L(unaligned) + + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g1) + +L(g0): lwzu rWORD1, 4(rSTR1) + bne cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + rlwimi rTMP2, rTMP2, 1, 0, 30 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpw rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + blr +END (strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S new file mode 100644 index 0000000000..c7af830dda --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S @@ -0,0 +1,117 @@ +/* Optimized strcpy implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (strcpy, 4, 0) + +#define rTMP r0 +#define rRTN r3 /* incoming DEST arg preserved as result */ +#define rSRC r4 /* pointer to previous word in src */ +#define rDEST r5 /* pointer to previous word in dest */ +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + + or rTMP, rSRC, rRTN + clrlwi. rTMP, rTMP, 30 + addi rDEST, rRTN, -4 + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stb rALT, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stb rTMP, 5(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stb rTMP, 6(rDEST) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stb rTMP, 7(rDEST) + blr +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stb rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stb rTMP, 5(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stb rTMP, 6(rDEST) + beqlr- + stb rALT, 7(rDEST) + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rRTN, -1 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stb rWORD, 1(rDEST) + blr +L(u1): stb rALT, 1(rDEST) + blr + +END (strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S new file mode 100644 index 0000000000..fa245f0760 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S @@ -0,0 +1,190 @@ +/* Optimized strlen implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. You get 0x80 in bytes that + match, but possibly false 0x80 matches in the next more significant + byte to a true match due to carries. For little-endian this is + of no consequence since the least significant match is the one + we're interested in, but big-endian needs method 2 to find which + byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +ENTRY (strlen) + +#define rTMP4 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rWORD1 r8 /* current string word */ +#define rWORD2 r9 /* next string word */ +#define rMASK r9 /* mask for first string word */ +#define rTMP1 r10 +#define rTMP2 r11 +#define rTMP3 r12 + + + clrrwi rSTR, rRTN, 2 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 27, 28 + lwz rWORD1, 0(rSTR) + li rMASK, -1 + addi r7F7F, r7F7F, 0x7f7f +/* We use method (2) on the first two words, because rFEFE isn't + required which reduces setup overhead. Also gives a faster return + for small strings on big-endian due to needing to recalculate with + method (2) anyway. */ +#ifdef __LITTLE_ENDIAN__ + slw rMASK, rMASK, rPADN +#else + srw rMASK, rMASK, rPADN +#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor rTMP3, rTMP2, rTMP1 + and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + lis rFEFE, -0x101 + addi rFEFE, rFEFE, -0x101 +/* Are we now aligned to a doubleword boundary? */ + bt 29, L(loop) + +/* Handle second word of pair. */ +/* Perhaps use method (1) here for little-endian, saving one instruction? */ + lwzu rWORD1, 4(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + +/* The loop. */ + +L(loop): + lwz rWORD1, 4(rSTR) + lwzu rWORD2, 8(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP3, rTMP3, rTMP4 + beq L(loop) + +#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 4 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first word in the string that + contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, + and 0x00 otherwise. */ +L(done0): + cntlzw rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srwi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#else + +L(done0): + addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ + andc rTMP1, rTMP1, rTMP3 + cntlzw rTMP1, rTMP1 /* Count bits not in the mask. */ + subf rTMP3, rRTN, rSTR + subfic rTMP1, rTMP1, 32-7 + srwi rTMP1, rTMP1, 3 + add rRTN, rTMP1, rTMP3 + blr + +L(done1): + addi rTMP3, rTMP1, -1 + andc rTMP3, rTMP3, rTMP1 + cntlzw rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + subfic rTMP3, rTMP3, 32-7-32 + srawi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#endif + +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S new file mode 100644 index 0000000000..dadc90d661 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S @@ -0,0 +1,181 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (strncmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrlwi. rTMP, rTMP, 30 + cmplwi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP, rN, 2 + clrlwi rN, rN, 30 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmplwi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1, 4(rSTR1) + bne- cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 4 + bne- cr1, L(different) + addi rSTR2, rSTR2, 4 + cmplwi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(uz) +L(ux): + li rRTN, 0 + blr + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) +L(u0): + lbzu rWORD2, 1(rSTR2) +L(u1): + bdz L(u3) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + beq- cr1, L(u3) + lbzu rWORD1, 1(rSTR1) + bne- L(u2) + lbzu rWORD2, 1(rSTR2) + bdz L(u3) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + bne+ cr1, L(u0) + +L(u2): lbzu rWORD1, -1(rSTR1) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S b/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S new file mode 100644 index 0000000000..659f348079 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S @@ -0,0 +1,68 @@ +/* Subtract two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN (__mpn_sub_n, 3, 1) + +/* Set up for loop below. */ + mtcrf 0x01,r6 + srwi. r7,r6,1 + mtctr r7 + bt 31,L(2) + +/* Set the carry (clear the borrow). */ + subfc r0,r0,r0 +/* Adjust pointers for loop. */ + addi r3,r3,-4 + addi r4,r4,-4 + addi r5,r5,-4 + b L(0) + +L(2): lwz r7,0(r5) + lwz r6,0(r4) + subfc r6,r7,r6 + stw r6,0(r3) + beq L(1) + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). This turns + out to be important. */ +L(0): + lwz r9,4(r4) + lwz r8,4(r5) + lwzu r6,8(r4) + lwzu r7,8(r5) + subfe r8,r8,r9 + stw r8,4(r3) + subfe r6,r7,r6 + stwu r6,8(r3) + bdnz L(0) +/* Return the borrow. */ +L(1): subfe r3,r3,r3 + neg r3,r3 + blr +END (__mpn_sub_n) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S new file mode 100644 index 0000000000..c1183c40f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S @@ -0,0 +1,51 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res-s1*s2 and put result back in res; return carry. */ + +ENTRY (__mpn_submul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + subf r8,r7,r9 + addc r7,r7,r8 # invert cy (r7 is junk) + addi r3,r3,-4 # adjust res_ptr + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + subf r8,r7,r9 + addc r7,r7,r8 # invert cy (r7 is junk) + bdnz L(0) + +L(1): stw r8,4(r3) + addze r3,r10 + blr +END (__mpn_submul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h b/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h new file mode 100644 index 0000000000..dbb3141621 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h @@ -0,0 +1,21 @@ +/* Hacks needed for symbol manipulation. powerpc version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/wordsize-32/divdi3-symbol-hacks.h> + +#include_next "symbol-hacks.h" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h b/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h new file mode 100644 index 0000000000..f92ab2cded --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h @@ -0,0 +1,174 @@ +/* Assembly macros for 32-bit PowerPC. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/sysdep.h> + +#ifdef __ASSEMBLER__ + +/* If compiled for profiling, call `_mcount' at the start of each + function. */ +#ifdef PROF +/* The mcount code relies on a the return address being on the stack + to locate our caller and so it can restore it; so store one just + for its benefit. */ +# define CALL_MCOUNT \ + mflr r0; \ + stw r0,4(r1); \ + cfi_offset (lr, 4); \ + bl JUMPTARGET(_mcount); +#else /* PROF */ +# define CALL_MCOUNT /* Do nothing. */ +#endif /* PROF */ + +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +/* helper macro for accessing the 32-bit powerpc GOT. */ + +#define SETUP_GOT_ACCESS(regname,GOT_LABEL) \ + bcl 20,31,GOT_LABEL ; \ +GOT_LABEL: ; \ + mflr (regname) + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^align boundary. */ +#ifdef PROF +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT \ + b 0f; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + 0: +#else /* PROF */ +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(name) \ + cfi_startproc; +#endif + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + +#if ! IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + cmpwi 2,0; \ + beq 1f; \ + lwz 0,TM_CAPABLE(2); \ + cmpwi 0,0; \ + beq 1f; \ + li 11,_ABORT_SYSCALL; \ + tabort. 11; \ + .align 4; \ +1: +#else +# define ABORT_TRANSACTION +#endif + +#define DO_CALL(syscall) \ + ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +#undef JUMPTARGET +#ifdef PIC +# define JUMPTARGET(name) name##@plt +#else +# define JUMPTARGET(name) name +#endif + +#if defined SHARED && defined PIC && !defined NO_HIDDEN +# undef HIDDEN_JUMPTARGET +# define HIDDEN_JUMPTARGET(name) __GI_##name##@local +#endif + +#define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET \ + bnslr+; \ + b __syscall_error@local +#define ret PSEUDO_RET + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +#define PSEUDO_NOERRNO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_NOERRNO \ + blr +#define ret_NOERRNO PSEUDO_RET_NOERRNO + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name) \ + END (name) + +#define PSEUDO_ERRVAL(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_ERRVAL \ + blr +#define ret_ERRVAL PSEUDO_RET_ERRVAL + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name) \ + END (name) + +/* Local labels stripped out by the linker. */ +#undef L +#define L(x) .L##x + +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE (a,b) +#define GENERATE_GOT_LABEL(name) GLUE (.got_label, name) + +/* Label in text section. */ +#define C_TEXT(name) name + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h new file mode 100644 index 0000000000..ee0eac4858 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h @@ -0,0 +1,49 @@ +/* Include sysdeps/powerpc/tls-macros.h for __TLS_CALL_CLOBBERS */ +#include_next "tls-macros.h" + +/* PowerPC32 Local Exec TLS access. */ +#define TLS_LE(x) \ + ({ int *__result; \ + asm ("addi %0,2," #x "@tprel" \ + : "=r" (__result)); \ + __result; }) + +/* PowerPC32 Initial Exec TLS access. */ +#define TLS_IE(x) \ + ({ int *__result; \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr %0\n\t" \ + "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "lwz %0," #x "@got@tprel(%0)\n\t" \ + "add %0,%0," #x "@tls" \ + : "=b" (__result) : \ + : "lr"); \ + __result; }) + +/* PowerPC32 Local Dynamic TLS access. */ +#define TLS_LD(x) \ + ({ int *__result; \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr 3\n\t" \ + "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "addi 3,3," #x "@got@tlsld\n\t" \ + "bl __tls_get_addr@plt\n\t" \ + "addi %0,3," #x "@dtprel" \ + : "=r" (__result) : \ + : "3", __TLS_CALL_CLOBBERS); \ + __result; }) + +/* PowerPC32 General Dynamic TLS access. */ +#define TLS_GD(x) \ + ({ register int *__result __asm__ ("r3"); \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr 3\n\t" \ + "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "addi 3,3," #x "@got@tlsgd\n\t" \ + "bl __tls_get_addr@plt" \ + : "=r" (__result) : \ + : __TLS_CALL_CLOBBERS); \ + __result; }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h b/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h new file mode 100644 index 0000000000..2e5e0c91d5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h @@ -0,0 +1,25 @@ +/* Definitions for testing PLT entry/exit auditing. PowerPC32 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_ppc32_gnu_pltenter +#define pltexit la_ppc32_gnu_pltexit +#define La_regs La_ppc32_regs +#define La_retval La_ppc32_retval +#define int_retval lrv_r3 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies new file mode 100644 index 0000000000..a8cae95f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies @@ -0,0 +1 @@ +wordsize-64 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile new file mode 100644 index 0000000000..9d15db0328 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile @@ -0,0 +1,49 @@ +# Powerpc64 specific build options. +# this is ./sysdeps/powerpc/powerpc64/Makefile + +# Each TOC entry takes 8 bytes and the TOC holds up to 2^16 bytes, +# or 8192 entries. +# If -fpic is not specified, the latest gcc-3.2.1 now generates +# different code for call stubs (without the TOC reload). +# Shared objects need the TOC reload so specify -fpic. +ifeq (yes,$(build-shared)) +pic-ccflag = -fpic +endif + +# These flags prevent FPU or Altivec registers from being used, +# for code called in contexts that is not allowed to touch those registers. +# Stupid GCC requires us to pass all these ridiculous switches. We need to +# pass the -mno-* switches as well to prevent the compiler from attempting +# to emit altivec or vsx instructions, especially when the registers aren't +# available. +no-special-regs := $(sort $(foreach n,40 41 50 51 60 61 62 63 \ + $(foreach m,2 3 4 5 6 7 8 9, \ + 3$m 4$m 5$m),\ + -ffixed-$n)) \ + $(sort $(foreach n,$(foreach m,0 1 2 3 4 5 6 7 8 9,\ + $m 1$m 2$m) 30 31,\ + -ffixed-v$n)) \ + -ffixed-vrsave -ffixed-vscr -mno-altivec -mno-vsx + +# Need to prevent gcc from using fprs in code used during dynamic linking. + +CFLAGS-dl-runtime.os = $(no-special-regs) +CFLAGS-dl-lookup.os = $(no-special-regs) +CFLAGS-dl-misc.os = $(no-special-regs) +CFLAGS-rtld-mempcpy.os = $(no-special-regs) +CFLAGS-rtld-memmove.os = $(no-special-regs) +CFLAGS-rtld-memchr.os = $(no-special-regs) +CFLAGS-rtld-strnlen.os = $(no-special-regs) + +ifeq ($(subdir),elf) +# help gcc inline asm code from dl-machine.h ++cflags += -finline-limit=2000 +endif + +ifeq ($(subdir),gmon) +# The assembly functions assume that fp arg regs are not trashed. +# Compiling with -msoft-float ensures that fp regs are not used +# for moving memory around. +CFLAGS-mcount.c += $(no-special-regs) +sysdep_routines += ppc-mcount +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S new file mode 100644 index 0000000000..efda025b41 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -0,0 +1,183 @@ +/* longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#define _SETJMP_H +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (__longjmp) + CALL_MCOUNT 2 +#ifndef __NO_VMX__ + ld r5,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) +# else + ld r5,0(r5) /* Load extern _dl_hwcap. */ +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 + lvsl v0,0,r5 + lvx v1,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + vperm v20,v1,v21,v0 +# define load_misaligned_vmx_lo_loaded(loadvr,lovr,shiftvr,loadgpr,addgpr) \ + addi addgpr,addgpr,32; \ + lvx lovr,0,loadgpr; \ + vperm loadvr,loadvr,lovr,shiftvr; + load_misaligned_vmx_lo_loaded(v21,v22,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v22,v23,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v23,v24,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v24,v25,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v25,v26,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v26,v27,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v27,v28,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v28,v29,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v29,v30,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v30,v31,v0,r6,r5) + lvx v1,0,r5 + vperm v31,v31,v1,v0 + b L(no_vmx) +L(aligned_restore_vmx): + addi r6,r5,16 + lvx v20,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + addi r6,r6,32 + lvx v22,0,r5 + addi r5,r5,32 + lvx v23,0,r6 + addi r6,r6,32 + lvx v24,0,r5 + addi r5,r5,32 + lvx v25,0,r6 + addi r6,r6,32 + lvx v26,0,r5 + addi r5,r5,32 + lvx v27,0,r6 + addi r6,r6,32 + lvx v28,0,r5 + addi r5,r5,32 + lvx v29,0,r6 + addi r6,r6,32 + lvx v30,0,r5 + lvx v31,0,r6 +L(no_vmx): +#endif +#if defined PTR_DEMANGLE || defined CHECK_SP + ld r22,(JB_GPR1*8)(r3) +#else + ld r1,(JB_GPR1*8)(r3) +#endif +#ifdef PTR_DEMANGLE +# ifdef CHECK_SP + PTR_DEMANGLE3 (r22, r22, r25) +# else + PTR_DEMANGLE3 (r1, r22, r25) +# endif +#endif +#ifdef CHECK_SP + CHECK_SP (r22) + mr r1,r22 +#endif + ld r2,(JB_GPR2*8)(r3) + ld r0,(JB_LR*8)(r3) + ld r14,((JB_GPRS+0)*8)(r3) + lfd fp14,((JB_FPRS+0)*8)(r3) +#if defined SHARED && !IS_IN (rtld) + std r2,FRAME_TOC_SAVE(r1) /* Restore the callers TOC save area. */ +#endif + ld r15,((JB_GPRS+1)*8)(r3) + lfd fp15,((JB_FPRS+1)*8)(r3) + ld r16,((JB_GPRS+2)*8)(r3) + lfd fp16,((JB_FPRS+2)*8)(r3) + ld r17,((JB_GPRS+3)*8)(r3) + lfd fp17,((JB_FPRS+3)*8)(r3) + ld r18,((JB_GPRS+4)*8)(r3) + lfd fp18,((JB_FPRS+4)*8)(r3) + ld r19,((JB_GPRS+5)*8)(r3) + lfd fp19,((JB_FPRS+5)*8)(r3) + ld r20,((JB_GPRS+6)*8)(r3) + lfd fp20,((JB_FPRS+6)*8)(r3) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (8@3), + second argument (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (longjmp, 3, 8@3, -4@4, 8@0) + mtlr r0 +/* std r2,FRAME_TOC_SAVE(r1) Restore the TOC save area. */ + ld r21,((JB_GPRS+7)*8)(r3) + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) + lfd fp22,((JB_FPRS+8)*8)(r3) + lwz r5,((JB_CR*8)+4)(r3) /* 32-bit CR. */ + ld r23,((JB_GPRS+9)*8)(r3) + lfd fp23,((JB_FPRS+9)*8)(r3) + ld r24,((JB_GPRS+10)*8)(r3) + lfd fp24,((JB_FPRS+10)*8)(r3) + ld r25,((JB_GPRS+11)*8)(r3) + lfd fp25,((JB_FPRS+11)*8)(r3) + mtcrf 0xFF,r5 + ld r26,((JB_GPRS+12)*8)(r3) + lfd fp26,((JB_FPRS+12)*8)(r3) + ld r27,((JB_GPRS+13)*8)(r3) + lfd fp27,((JB_FPRS+13)*8)(r3) + ld r28,((JB_GPRS+14)*8)(r3) + lfd fp28,((JB_FPRS+14)*8)(r3) + ld r29,((JB_GPRS+15)*8)(r3) + lfd fp29,((JB_FPRS+15)*8)(r3) + ld r30,((JB_GPRS+16)*8)(r3) + lfd fp30,((JB_FPRS+16)*8)(r3) + ld r31,((JB_GPRS+17)*8)(r3) + lfd fp31,((JB_FPRS+17)*8)(r3) + LIBC_PROBE (longjmp_target, 3, 8@3, -4@4, 8@0) + mr r3,r4 + blr +END (__longjmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S new file mode 100644 index 0000000000..78659d012f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S @@ -0,0 +1,39 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp) +# define __longjmp __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __longjmp +# undef JB_SIZE +# define __longjmp __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S new file mode 100644 index 0000000000..ff30898df5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S @@ -0,0 +1,528 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Brutman <brutman@us.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + + + .machine a2 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 /* Copy dest reg to r6; */ + blt+ cr1,L(shortcopy) + + + /* Big copy (16 bytes or more) + + Figure out how far to the nearest quadword boundary, or if we are + on one already. Also get the cache line size. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ + ld r9,.LC0@toc(r2) /* Get cache line size (part 1) */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 /* compute offset to src from dest */ + lwz r9,0(r9) /* Get cache line size (part 2) */ + cmpldi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */ + addi r10,r9,-1 /* Cache line mask */ + beq+ L(dst_aligned) + + + + /* Destination is not aligned on quadword boundary. Get us to one. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + r7 - offset to src from dest + r8 - number of bytes to quadword boundary + */ + + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 /* adjust remaining len */ + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte addr */ + stb r0,0(r6) + addi r6,r6,1 +1: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte addr */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte addr */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte addr */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 /* update src addr */ + + + + /* Dest is quadword aligned now. + + Lots of decisions to make. If we are copying less than a cache + line we won't be here long. If we are not on a cache line + boundary we need to get there. And then we need to figure out + how many cache lines ahead to pre-touch. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + + .align 4 +L(dst_aligned): + cmpdi cr0,r9,0 /* Cache line size set? */ + bne+ cr0,L(cachelineset) + +/* __cache_line_size not set: generic byte copy without much optimization */ + clrldi. r0,r5,63 /* If length is odd copy one byte */ + beq L(cachelinenotset_align) + lbz r7,0(r4) /* Read one byte from source */ + addi r5,r5,-1 /* Update length */ + addi r4,r4,1 /* Update source pointer address */ + stb r7,0(r6) /* Store one byte at dest */ + addi r6,r6,1 /* Update dest pointer address */ +L(cachelinenotset_align): + cmpdi cr7,r5,0 /* If length is 0 return */ + beqlr cr7 + ori r2,r2,0 /* Force a new dispatch group */ +L(cachelinenotset_loop): + addic. r5,r5,-2 /* Update length */ + lbz r7,0(r4) /* Load 2 bytes from source */ + lbz r8,1(r4) + addi r4,r4,2 /* Update source pointer address */ + stb r7,0(r6) /* Store 2 bytes on dest */ + stb r8,1(r6) + addi r6,r6,2 /* Update dest pointer address */ + bne L(cachelinenotset_loop) + blr + + +L(cachelineset): + cmpd cr5,r5,r10 /* Less than a cacheline to go? */ + + neg r7,r6 /* How far to next cacheline bdy? */ + + addi r6,r6,-8 /* prepare for stdu */ + cmpdi cr0,r9,128 + addi r4,r4,-8 /* prepare for ldu */ + + + ble+ cr5,L(lessthancacheline) + + beq- cr0,L(big_lines) /* 128 byte line code */ + + + + /* More than a cacheline left to go, and using 64 byte cachelines */ + + clrldi r7,r7,64-6 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,6 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,64+8 /* prefetch distance */ + ble L(lessthanmaxprefetch) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC): + dcbt r12,r4 + addi r12,r12,64 + bdnz L(prefetchSRC) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch): + mtctr r7 + + cmpldi cr1,r5,64 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-6 + + beq cr6,L(cachelinealigned) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline) + + + .align 4 +L(cachelinealigned): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <64 */ + +L(outerloop): + cmpdi r11,0 + mtctr r11 + beq- L(endloop) + + li r11,64*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop) + + +L(endloop): + cmpdi r10,0 + beq- L(endloop2) + mtctr r10 + +L(loop2): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop2) +L(endloop2): + + + .align 4 +L(lessthancacheline): /* Was there less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- L(do_lt16) + mtctr r7 + +L(copy_remaining): + ld r8,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz L(copy_remaining) + +L(do_lt16): /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +L(shortcopy): /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: + blr + + + + + + /* Similar to above, but for use with 128 byte lines. */ + + +L(big_lines): + + clrldi r7,r7,64-7 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,7 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch_128) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble L(lessthanmaxprefetch_128) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch_128): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC_128): + dcbt r12,r4 + addi r12,r12,128 + bdnz L(prefetchSRC_128) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch_128): + mtctr r7 + + cmpldi cr1,r5,128 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-7 + + beq cr6,L(cachelinealigned_128) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline_128): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline_128) + + +L(cachelinealigned_128): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <128 */ + +L(outerloop_128): + cmpdi r11,0 + mtctr r11 + beq- L(endloop_128) + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop_128): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop_128) + + +L(endloop_128): + cmpdi r10,0 + beq- L(endloop2_128) + mtctr r10 + +L(loop2_128): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop2_128) +L(endloop2_128): + + b L(lessthancacheline) + + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S new file mode 100644 index 0000000000..b4b052141d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S @@ -0,0 +1,208 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_SUBMUL +# define FUNC __mpn_submul_1 +# define ADDSUBC subfe +# define ADDSUB subfc +#else +# define FUNC __mpn_addmul_1 +# define ADDSUBC adde +# define ADDSUB addc +#endif + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(FUNC, 5, 0) + std r31, -8(r1) + rldicl. r0, N, 0, 62 + std r30, -16(r1) + cmpdi VL, r0, 2 + std r29, -24(r1) + addi N, N, 3 + std r28, -32(r1) + srdi N, N, 2 + std r27, -40(r1) + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): ld r9, 0(UP) + ld r28, 0(RP) + mulld r0, r9, VL + mulhdu r12, r9, VL + ADDSUB r0, r0, r28 + std r0, 0(RP) + addi RP, RP, 8 + ld r9, 8(UP) + ld r27, 16(UP) + addi UP, UP, 24 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b00): ld r9, 0(UP) + ld r27, 8(UP) + ld r28, 0(RP) + ld r29, 8(RP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r7, r7, N + addze r12, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + addi RP, RP, 16 + ld r9, 16(UP) + ld r27, 24(UP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b01): bdnz L(gt1) + ld r9, 0(UP) + ld r11, 0(RP) + mulld r0, r9, VL + mulhdu r8, r9, VL + ADDSUB r0, r0, r11 + std r0, 0(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + blr + +L(gt1): ld r9, 0(UP) + ld r27, 8(UP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 16(UP) + ld r28, 0(RP) + ld r29, 8(RP) + ld r30, 16(RP) + mulld r11, r9, VL + mulhdu r10, r9, VL + addc r7, r7, N + adde r11, r11, r8 + addze r12, r10 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC r11, r11, r30 + std r11, 16(RP) + addi RP, RP, 24 + ld r9, 24(UP) + ld r27, 32(UP) + addi UP, UP, 40 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + +L(b10): addic r0, r0, r0 + li r12, 0 + ld r9, 0(UP) + ld r27, 8(UP) + bdz L(end) + addi UP, UP, 16 + + .align 4 +L(top): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 0(UP) + ld r28, 0(RP) + ld r27, 8(UP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + mulld N, r9, VL + mulhdu r10, r9, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r9, 16(UP) + ld r30, 16(RP) + ld r27, 24(UP) + ld r31, 24(RP) + adde N, N, r8 + adde r11, r11, r10 + addze r12, r12 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC N, N, r30 + std N, 16(RP) + ADDSUBC r11, r11, r31 + std r11, 24(RP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + addi RP, RP, 32 +L(bot): +#ifdef USE_AS_SUBMUL + addic r11, r11, 1 +#endif + bdnz L(top) + +L(end): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r28, 0(RP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + addze r8, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + ld r31, -8(r1) + ld r30, -16(r1) + ld r29, -24(r1) + ld r28, -32(r1) + ld r27, -40(r1) + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h new file mode 100644 index 0000000000..46df488b3c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h @@ -0,0 +1,242 @@ +/* Atomic operations. PowerPC64 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction. + This is a hint to the hardware to expect additional updates adjacent + to the lock word or not. If we are acquiring a Mutex, the hint + should be true. Otherwise we releasing a Mutex or doing a simple + atomic operation. In that case we don't expect additional updates + adjacent to the lock word after the Store Conditional and the hint + should be false. */ + +#if defined _ARCH_PWR6 || defined _ARCH_PWR6X +# define MUTEX_HINT_ACQ ",1" +# define MUTEX_HINT_REL ",0" +#else +# define MUTEX_HINT_ACQ +# define MUTEX_HINT_REL +#endif + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 1 + +/* The 32-bit exchange_bool is different on powerpc64 because the subf + does signed 64-bit arithmetic while the lwarx is 32-bit unsigned + (a load word and zero (high 32) form) load. + In powerpc64 register values are 64-bit by default, including oldval. + The value in old val unknown sign extension, lwarx loads the 32-bit + value as unsigned. So we explicitly clear the high 32 bits in oldval. */ +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ +({ \ + unsigned int __tmp, __tmp2; \ + __asm __volatile (" clrldi %1,%1,32\n" \ + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%1,%0\n" \ + " bne 2f\n" \ + " stwcx. %4,0,%2\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp), "=r" (__tmp2) \ + : "b" (mem), "1" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +/* + * Only powerpc64 processors support Load doubleword and reserve index (ldarx) + * and Store doubleword conditional indexed (stdcx) instructions. So here + * we define the 64-bit forms. + */ +#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ +({ \ + unsigned long __tmp; \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%2,%0\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (mem), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%1" MUTEX_HINT_REL "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_atomic_exchange_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b\n" \ + " " __ARCH_ACQ_INSTR \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b" \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3" MUTEX_HINT_ACQ "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%3" MUTEX_HINT_REL "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_increment_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " addi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " subi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_if_positive_64(mem) \ + ({ int __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " cmpdi 0,%0,0\n" \ + " addi %1,%0,-1\n" \ + " ble 2f\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +/* + * All powerpc64 processors support the new "light weight" sync (lwsync). + */ +#define atomic_read_barrier() __asm ("lwsync" ::: "memory") +/* + * "light weight" sync can also be used for the release barrier. + */ +#ifndef UP +# define __ARCH_REL_INSTR "lwsync" +#endif +#define atomic_write_barrier() __asm ("lwsync" ::: "memory") + +/* + * Include the rest of the atomic ops macros which are common to both + * powerpc32 and powerpc64. + */ +#include_next <atomic-machine.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c new file mode 100644 index 0000000000..723948d78f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c @@ -0,0 +1,104 @@ +/* Return backtrace of current program state. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> + +#include <execinfo.h> +#include <libc-vdso.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | cr save | | cr save | + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *next; + long int condition_register; + void *return_address; +}; + +/* Since the signal handler is just like any other function it needs to + save/restore its LR and it will save it into callers stack frame. + Since a signal handler doesn't have a caller, the kernel creates a + dummy frame to make it look like it has a caller. */ +struct signal_frame_64 { +#define SIGNAL_FRAMESIZE 128 + char dummy[SIGNAL_FRAMESIZE]; + struct ucontext uc; + /* We don't care about the rest, since the IP value is at 'uc' field. */ +}; + +static inline int +is_sigtramp_address (void *nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp_rt64)) + return 1; +#endif + return 0; +} + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("ld %0,0(1)" : "=r"(current)); + + for ( count = 0; + current != NULL && count < size; + current = current->next, count++) + { + array[count] = current->return_address; + + /* Check if the symbol is the signal trampoline and get the interrupted + * symbol address from the trampoline saved area. */ + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_64 *sigframe = (struct signal_frame_64*) current; + array[++count] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + current = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_R1]; + } + } + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h new file mode 100644 index 0000000000..04ca9debf0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __powerpc64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S new file mode 100644 index 0000000000..86d49b1c6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S new file mode 100644 index 0000000000..38b734fcb4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S new file mode 100644 index 0000000000..41cfac5127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S @@ -0,0 +1,20 @@ +/* Optimized bzero `implementation' for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This code was moved into memset.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S new file mode 100644 index 0000000000..1cc66456e3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S @@ -0,0 +1,246 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ + +/* memcpy routine optimized for CELL-BE-PPC v2.0 + * + * The CELL PPC core has 1 integer unit and 1 load/store unit + * CELL: + * 1st level data cache = 32K + * 2nd level data cache = 512K + * 3rd level data cache = 0K + * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks, + * latency to memory is >400 clocks + * To improve copy performance we need to prefetch source data + * far ahead to hide this latency + * For best performance instruction forms ending in "." like "andi." + * should be avoided as the are implemented in microcode on CELL. + * The below code is loop unrolled for the CELL cache line of 128 bytes + */ + +.align 7 + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 + blt+ cr1,.Lshortcopy + +.Lbigcopy: + neg r8,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 + cmpldi cr0,r8,0 + beq+ .Ldst_aligned + +.Ldst_unaligned: + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) + addi r6,r6,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 + +.Ldst_aligned: + + cmpdi cr5,r5,128-1 + + neg r7,r6 + addi r6,r6,-8 /* prepare for stdu */ + addi r4,r4,-8 /* prepare for ldu */ + + clrldi r7,r7,64-7 /* align to cacheline boundary */ + ble+ cr5,.Llessthancacheline + + cmpldi cr6,r7,0 + subf r5,r7,r5 + srdi r7,r7,4 /* divide size by 16 */ + srdi r10,r5,7 /* number of cache lines to copy */ + + cmpldi r10,0 + li r11,0 /* number cachelines to copy with prefetch */ + beq .Lnocacheprefetch + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble .Llessthanmaxprefetch + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +.Llessthanmaxprefetch: + mtctr r10 + +.LprefetchSRC: + dcbt r12,r4 + addi r12,r12,128 + bdnz .LprefetchSRC + +.Lnocacheprefetch: + mtctr r7 + cmpldi cr1,r5,128 + clrldi r5,r5,64-7 + beq cr6,.Lcachelinealigned + +.Laligntocacheline: + ld r9,0x08(r4) + ldu r7,0x10(r4) + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz .Laligntocacheline + + +.Lcachelinealigned: /* copy while cache lines */ + + blt- cr1,.Llessthancacheline /* size <128 */ + +.Louterloop: + cmpdi r11,0 + mtctr r11 + beq- .Lendloop + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + +.align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +.Lloop: /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) /* 4 register stride copy is optimal */ + ld r8, 0x18(r4) /* to hide 1st level cache latency. */ + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ldu r0, 0x80(r4) + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz .Lloop + +.Lendloop: + cmpdi r10,0 + sldi r10,r10,2 /* adjust from 128 to 32 byte stride */ + beq- .Lendloop2 + mtctr r10 + +.Lloop2: /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ldu r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + stdu r0, 0x20(r6) + + bdnz .Lloop2 +.Lendloop2: + +.Llessthancacheline: /* less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- .Ldo_lt16 + mtctr r7 + +.Lcopy_remaining: + ld r8,0x08(r4) + ldu r7,0x10(r4) + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz .Lcopy_remaining + +.Ldo_lt16: /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +.Lshortcopy: /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure b/REORG.TODO/sysdeps/powerpc/powerpc64/configure new file mode 100644 index 0000000000..7632a7be04 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure @@ -0,0 +1,33 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc64. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for support for overlapping .opd entries" >&5 +$as_echo_n "checking for support for overlapping .opd entries... " >&6; } +if ${libc_cv_overlapping_opd+:} false; then : + $as_echo_n "(cached) " >&6 +else + libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +fi +rm -f conftest.c conftest.s + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_overlapping_opd" >&5 +$as_echo "$libc_cv_overlapping_opd" >&6; } +if test x$libc_cv_overlapping_opd = xyes; then + $as_echo "#define USE_PPC64_OVERLAPPING_OPD 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac new file mode 100644 index 0000000000..f309ba35a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac @@ -0,0 +1,23 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc64. + +AC_CACHE_CHECK(for support for overlapping .opd entries, +libc_cv_overlapping_opd, [dnl +libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +changequote([,])dnl +fi +rm -f conftest.c conftest.s +]) +if test x$libc_cv_overlapping_opd = xyes; then + AC_DEFINE(USE_PPC64_OVERLAPPING_OPD) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S new file mode 100644 index 0000000000..fa4f5833ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S @@ -0,0 +1,88 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + +#if PREINIT_FUNCTION_WEAK + .section ".toc", "aw" +.LC0: + .tc PREINIT_FUNCTION[TC], PREINIT_FUNCTION +#endif + .section ".init", "ax", @progbits + ENTRY_2(_init) + .align ALIGNARG (2) +BODY_LABEL (_init): + LOCALENTRY(_init) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) +#if PREINIT_FUNCTION_WEAK + addis r9, r2, .LC0@toc@ha + ld r0, .LC0@toc@l(r9) + cmpdi cr7, r0, 0 + beq+ cr7, 1f +#endif + bl JUMPTARGET (PREINIT_FUNCTION) + nop +1: + + .section ".fini", "ax", @progbits + ENTRY_2(_fini) + .align ALIGNARG (2) +BODY_LABEL (_fini): + LOCALENTRY(_fini) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S new file mode 100644 index 0000000000..a8906aa16c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S @@ -0,0 +1,51 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr + + .section .fini,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h new file mode 100644 index 0000000000..142714b421 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h @@ -0,0 +1,21 @@ +/* Configuration of lookup functions. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC64_NUM diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h new file mode 100644 index 0000000000..d8f5988bc9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h @@ -0,0 +1,63 @@ +/* Machine-dependent ELF indirect relocation inline functions. + PowerPC64 version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <dl-machine.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf64_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf64_Addr addr) +{ + return ((Elf64_Addr (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf64_Rela *reloc) +{ + unsigned int r_type = ELF64_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_PPC64_IRELATIVE)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else if (__glibc_likely (r_type == R_PPC64_JMP_IREL)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); +#if _CALL_ELF != 2 + *(Elf64_FuncDesc *) reloc_addr = *(Elf64_FuncDesc *) value; +#else + *reloc_addr = value; +#endif + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c new file mode 100644 index 0000000000..0eccc6621a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c @@ -0,0 +1,47 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC64 version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <_itoa.h> +#include <dl-machine.h> + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) +{ + char buffer[128]; + char *t; + t = stpcpy (buffer, name); + t = stpcpy (t, " reloc at 0x"); + _itoa_word ((unsigned long) reloc_addr, t, 16, 0); + if (refsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + refsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h new file mode 100644 index 0000000000..6391b3a558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h @@ -0,0 +1,1036 @@ +/* Machine-dependent ELF dynamic relocation inline functions. + PowerPC64 version. + Copyright 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc64" + +#include <assert.h> +#include <sys/param.h> +#include <dl-tls.h> +#include <sysdep.h> +#include <hwcapinfo.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +#if _CALL_ELF != 2 +/* A PowerPC64 function descriptor. The .plt (procedure linkage + table) and .opd (official procedure descriptor) sections are + arrays of these. */ +typedef struct +{ + Elf64_Addr fd_func; + Elf64_Addr fd_toc; + Elf64_Addr fd_aux; +} Elf64_FuncDesc; +#endif + +#define ELF_MULT_MACHINES_SUPPORTED + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + /* Verify that the binary matches our ABI version. */ + if ((ehdr->e_flags & EF_PPC64_ABI) != 0) + { +#if _CALL_ELF != 2 + if ((ehdr->e_flags & EF_PPC64_ABI) != 1) + return 0; +#else + if ((ehdr->e_flags & EF_PPC64_ABI) != 2) + return 0; +#endif + } + + return ehdr->e_machine == EM_PPC64; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_machine (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_class (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_ident[EI_CLASS] == ELFCLASS32; +} + + +/* Return the run-time load address of the shared object, assuming it + was originally linked at zero. */ +static inline Elf64_Addr +elf_machine_load_address (void) __attribute__ ((const)); + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + Elf64_Addr ret; + + /* The first entry in .got (and thus the first entry in .toc) is the + link-time TOC_base, ie. r2. So the difference between that and + the current r2 set by the kernel is how far the shared lib has + moved. */ + asm ( " ld %0,-32768(2)\n" + " subf %0,%0,2\n" + : "=r" (ret)); + return ret; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ + Elf64_Addr runtime_dynamic; + /* It's easier to get the run-time address. */ + asm ( " addis %0,2,_DYNAMIC@toc@ha\n" + " addi %0,%0,_DYNAMIC@toc@l\n" + : "=b" (runtime_dynamic)); + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf64_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + + +#ifdef HAVE_INLINED_SYSCALLS +/* We do not need _dl_starting_up. */ +# define DL_STARTING_UP_DEF +#else +# define DL_STARTING_UP_DEF \ +".LC__dl_starting_up:\n" \ +" .tc __GI__dl_starting_up[TC],__GI__dl_starting_up\n" +#endif + + +/* Initial entry point code for the dynamic linker. The C function + `_dl_start' is the real entry point; its return value is the user + program's entry point. */ +#define RTLD_START \ + asm (".pushsection \".text\"\n" \ +" .align 2\n" \ +" " ENTRY_2(_start) "\n" \ +BODY_PREFIX "_start:\n" \ +" " LOCALENTRY(_start) "\n" \ +/* We start with the following on the stack, from top: \ + argc (4 bytes); \ + arguments for program (terminated by NULL); \ + environment variables (terminated by NULL); \ + arguments for the program loader. */ \ +" mr 3,1\n" \ +" li 4,0\n" \ +" stdu 4,-128(1)\n" \ +/* Call _dl_start with one parameter pointing at argc. */ \ +" bl " DOT_PREFIX "_dl_start\n" \ +" nop\n" \ +/* Transfer control to _dl_start_user! */ \ +" b " DOT_PREFIX "_dl_start_user\n" \ +".LT__start:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__start-" BODY_PREFIX "_start\n" \ +" .short .LT__start_name_end-.LT__start_name_start\n" \ +".LT__start_name_start:\n" \ +" .ascii \"_start\"\n" \ +".LT__start_name_end:\n" \ +" .align 2\n" \ +" " END_2(_start) "\n" \ +" .pushsection \".toc\",\"aw\"\n" \ +DL_STARTING_UP_DEF \ +".LC__rtld_local:\n" \ +" .tc _rtld_local[TC],_rtld_local\n" \ +".LC__dl_argc:\n" \ +" .tc _dl_argc[TC],_dl_argc\n" \ +".LC__dl_argv:\n" \ +" .tc __GI__dl_argv[TC],__GI__dl_argv\n" \ +".LC__dl_fini:\n" \ +" .tc _dl_fini[TC],_dl_fini\n" \ +" .popsection\n" \ +" " ENTRY_2(_dl_start_user) "\n" \ +/* Now, we do our main work of calling initialisation procedures. \ + The ELF ABI doesn't say anything about parameters for these, \ + so we just pass argc, argv, and the environment. \ + Changing these is strongly discouraged (not least because argc is \ + passed by value!). */ \ +BODY_PREFIX "_dl_start_user:\n" \ +" " LOCALENTRY(_dl_start_user) "\n" \ +/* the address of _start in r30. */ \ +" mr 30,3\n" \ +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ +" ld 28,.LC__rtld_local@toc(2)\n" \ +" ld 29,.LC__dl_argc@toc(2)\n" \ +" ld 27,.LC__dl_argv@toc(2)\n" \ +/* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ +" ld 3,0(28)\n" \ +" lwa 4,0(29)\n" \ +" ld 5,0(27)\n" \ +" sldi 6,4,3\n" \ +" add 6,5,6\n" \ +" addi 6,6,8\n" \ +" bl " DOT_PREFIX "_dl_init\n" \ +" nop\n" \ +/* Now, to conform to the ELF ABI, we have to: \ + Pass argc (actually _dl_argc) in r3; */ \ +" lwa 3,0(29)\n" \ +/* Pass argv (actually _dl_argv) in r4; */ \ +" ld 4,0(27)\n" \ +/* Pass argv+argc+1 in r5; */ \ +" sldi 5,3,3\n" \ +" add 6,4,5\n" \ +" addi 5,6,8\n" \ +/* Pass the auxiliary vector in r6. This is passed to us just after \ + _envp. */ \ +"2: ldu 0,8(6)\n" \ +" cmpdi 0,0\n" \ +" bne 2b\n" \ +" addi 6,6,8\n" \ +/* Pass a termination function pointer (in this case _dl_fini) in \ + r7. */ \ +" ld 7,.LC__dl_fini@toc(2)\n" \ +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ + value. This lets our startup code distinguish between a program \ + linked statically, which linux will call with argc on top of the \ + stack which will hopefully never be zero, and a dynamically linked \ + program which will always have a NULL on the top of the stack. \ + Take the opportunity to clear LR, so anyone who accidentally \ + returns from _start gets SEGV. Also clear the next few words of \ + the stack. */ \ +" li 31,0\n" \ +" std 31,0(1)\n" \ +" mtlr 31\n" \ +" std 31,8(1)\n" \ +" std 31,16(1)\n" \ +" std 31,24(1)\n" \ +/* Now, call the start function descriptor at r30... */ \ +" .globl ._dl_main_dispatch\n" \ +"._dl_main_dispatch:\n" \ +" " PPC64_LOAD_FUNCPTR(30) "\n" \ +" bctr\n" \ +".LT__dl_start_user:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__dl_start_user-" BODY_PREFIX "_dl_start_user\n" \ +" .short .LT__dl_start_user_name_end-.LT__dl_start_user_name_start\n" \ +".LT__dl_start_user_name_start:\n" \ +" .ascii \"_dl_start_user\"\n" \ +".LT__dl_start_user_name_end:\n" \ +" .align 2\n" \ +" " END_2(_dl_start_user) "\n" \ +" .popsection"); + +/* ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to + one of the main executable's symbols, as for a COPY reloc. + + To make function pointer comparisons work on most targets, the + relevant ABI states that the address of a non-local function in a + dynamically linked executable is the address of the PLT entry for + that function. This is quite reasonable since using the real + function address in a non-PIC executable would typically require + dynamic relocations in .text, something to be avoided. For such + functions, the linker emits a SHN_UNDEF symbol in the executable + with value equal to the PLT entry address. Normally, SHN_UNDEF + symbols have a value of zero, so this is a clue to ld.so that it + should treat these symbols specially. For relocations not in + ELF_RTYPE_CLASS_PLT (eg. those on function pointers), ld.so should + use the value of the executable SHN_UNDEF symbol, ie. the PLT entry + address. For relocations in ELF_RTYPE_CLASS_PLT (eg. the relocs in + the PLT itself), ld.so should use the value of the corresponding + defined symbol in the object that defines the function, ie. the + real function address. This complicates ld.so in that there are + now two possible values for a given symbol, and it gets even worse + because protected symbols need yet another set of rules. + + On PowerPC64 we don't need any of this. The linker won't emit + SHN_UNDEF symbols with non-zero values. ld.so can make all + relocations behave "normally", ie. always use the real address + like PLT relocations. So always set ELF_RTYPE_CLASS_PLT. */ + +#if _CALL_ELF != 2 +#define elf_machine_type_class(type) \ + (ELF_RTYPE_CLASS_PLT | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#else +/* And now that you have read that large comment, you can disregard it + all for ELFv2. ELFv2 does need the special SHN_UNDEF treatment. */ +#define IS_PPC64_TLS_RELOC(R) \ + (((R) >= R_PPC64_TLS && (R) <= R_PPC64_DTPREL16_HIGHESTA) \ + || ((R) >= R_PPC64_TPREL16_HIGH && (R) <= R_PPC64_DTPREL16_HIGHA)) + +#define elf_machine_type_class(type) \ + ((((type) == R_PPC64_JMP_SLOT \ + || (type) == R_PPC64_ADDR24 \ + || IS_PPC64_TLS_RELOC (type)) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#endif + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +#ifdef SHARED +# define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + __tcb_parse_hwcap_and_convert_at_platform (); +} +#endif + +/* Stuff for the PLT. */ +#if _CALL_ELF != 2 +#define PLT_INITIAL_ENTRY_WORDS 3 +#define PLT_ENTRY_WORDS 3 +#define GLINK_INITIAL_ENTRY_WORDS 8 +/* The first 32k entries of glink can set an index and branch using two + instructions; past that point, glink uses three instructions. */ +#define GLINK_ENTRY_WORDS(I) (((I) < 0x8000)? 2 : 3) +#else +#define PLT_INITIAL_ENTRY_WORDS 2 +#define PLT_ENTRY_WORDS 1 +#define GLINK_INITIAL_ENTRY_WORDS 8 +#define GLINK_ENTRY_WORDS(I) 1 +#endif + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory") +#define PPC_DCBF(where) asm volatile ("dcbf 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ +static inline int __attribute__ ((always_inline)) +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf64_Word i; + Elf64_Word *glink = NULL; + Elf64_Xword *plt = (Elf64_Xword *) D_PTR (map, l_info[DT_PLTGOT]); + Elf64_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf64_Rela)); + Elf64_Addr l_addr = map->l_addr; + Elf64_Dyn **info = map->l_info; + char *p; + + extern void _dl_runtime_resolve (void); + extern void _dl_profile_resolve (void); + + /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. + elf_get_dynamic_info takes care of the standard entries but + doesn't know exactly what to do with processor specific + entries. */ + if (info[DT_PPC64(GLINK)] != NULL) + info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; + + if (lazy) + { + Elf64_Word glink_offset; + Elf64_Word offset; + Elf64_Addr dlrr; + + dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve + : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + +#if _CALL_ELF != 2 + /* We need to stuff the address/TOC of _dl_runtime_resolve + into doublewords 0 and 1 of plt_reserve. Then we need to + stuff the map address into doubleword 2 of plt_reserve. + This allows the GLINK0 code to transfer control to the + correct trampoline which will transfer control to fixup + in dl-machine.c. */ + { + /* The plt_reserve area is the 1st 3 doublewords of the PLT. */ + Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; + Elf64_FuncDesc *resolve_fd = (Elf64_FuncDesc *) dlrr; + plt_reserve->fd_func = resolve_fd->fd_func; + plt_reserve->fd_toc = resolve_fd->fd_toc; + plt_reserve->fd_aux = (Elf64_Addr) map; +#ifdef RTLD_BOOTSTRAP + /* When we're bootstrapping, the opd entry will not have + been relocated yet. */ + plt_reserve->fd_func += l_addr; + plt_reserve->fd_toc += l_addr; +#endif + } +#else + /* When we don't have function descriptors, the first doubleword + of the PLT holds the address of _dl_runtime_resolve, and the + second doubleword holds the map address. */ + plt[0] = dlrr; + plt[1] = (Elf64_Addr) map; +#endif + + /* Set up the lazy PLT entries. */ + glink = (Elf64_Word *) D_PTR (map, l_info[DT_PPC64(GLINK)]); + offset = PLT_INITIAL_ENTRY_WORDS; + glink_offset = GLINK_INITIAL_ENTRY_WORDS; + for (i = 0; i < num_plt_entries; i++) + { + + plt[offset] = (Elf64_Xword) &glink[glink_offset]; + offset += PLT_ENTRY_WORDS; + glink_offset += GLINK_ENTRY_WORDS (i); + } + + /* Now, we've modified data. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. */ + + for (p = (char *) plt; p < (char *) &plt[offset]; p += 16) + PPC_DCBST (p); + PPC_SYNC; + } + } + return lazy; +} + +#if _CALL_ELF == 2 +/* If the PLT entry whose reloc is 'reloc' resolves to a function in + the same object, return the target function's local entry point + offset if usable. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc) +{ + const Elf64_Sym *symtab; + const Elf64_Sym *sym; + + /* If the target function is in a different object, we cannot + use the local entry point. */ + if (sym_map != map) + return 0; + + /* If the linker inserted multiple TOCs, we cannot use the + local entry point. */ + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC)) + return 0; + + /* Otherwise, we can use the local entry point. Retrieve its offset + from the symbol's ELF st_other field. */ + symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); + sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; + + /* If the target function is an ifunc then the local entry offset is + for the resolver, not the final destination. */ + if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + return 0; + + return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other); +} +#endif + +/* Change the PLT entry whose reloc is 'reloc' to call the actual + routine. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_Addr offset = 0; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + PPC_DCBT (&plt->fd_aux); + PPC_DCBT (&plt->fd_func); + + /* If sym_map is NULL, it's a weak undefined sym; Set the plt to + zero. finaladdr should be zero already in this case, but guard + against invalid plt relocations with non-zero addends. */ + if (sym_map == NULL) + finaladdr = 0; + + /* Don't die here if finaladdr is zero, die if this plt entry is + actually called. Makes a difference when LD_BIND_NOW=1. + finaladdr may be zero for a weak undefined symbol, or when an + ifunc resolver returns zero. */ + if (finaladdr == 0) + rel = &zero_fd; + else + { + PPC_DCBT (&rel->fd_aux); + PPC_DCBT (&rel->fd_func); + } + + /* If the opd entry is not yet relocated (because it's from a shared + object that hasn't been processed yet), then manually reloc it. */ + if (finaladdr != 0 && map != sym_map && !sym_map->l_relocated +#if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +#endif + ) + offset = sym_map->l_addr; + + /* For PPC64, fixup_plt copies the function descriptor from opd + over the corresponding PLT entry. + Initially, PLT Entry[i] is set up for lazy linking, or is zero. + For lazy linking, the fd_toc and fd_aux entries are irrelevant, + so for thread safety we write them before changing fd_func. */ + + plt->fd_aux = rel->fd_aux + offset; + plt->fd_toc = rel->fd_toc + offset; + PPC_DCBF (&plt->fd_toc); + PPC_ISYNC; + + plt->fd_func = rel->fd_func + offset; + PPC_DCBST (&plt->fd_func); + PPC_ISYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif + + return finaladdr; +} + +static inline void __attribute__ ((always_inline)) +elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + if (sym_map == NULL) + finaladdr = 0; + + if (finaladdr == 0) + rel = &zero_fd; + + plt->fd_func = rel->fd_func; + plt->fd_aux = rel->fd_aux; + plt->fd_toc = rel->fd_toc; + PPC_DCBST (&plt->fd_func); + PPC_DCBST (&plt->fd_aux); + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + + +/* Names of the architecture-specific auditing callback functions. */ +#if _CALL_ELF != 2 +#define ARCH_LA_PLTENTER ppc64_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit +#else +#define ARCH_LA_PLTENTER ppc64v2_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64v2_gnu_pltexit +#endif + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHERA(v) PPC_HIGHER ((v) + 0x8000) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) +#define PPC_HIGHESTA(v) PPC_HIGHEST ((v) + 0x8000) +#define BIT_INSERT(var, val, mask) \ + ((var) = ((var) & ~(Elf64_Addr) (mask)) | ((val) & (mask))) + +#define dont_expect(X) __builtin_expect ((X), 0) + +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) + attribute_hidden; + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +/* This computes the value used by TPREL* relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline, const)) +elf_machine_tprel (struct link_map *map, + struct link_map *sym_map, + const Elf64_Sym *sym, + const Elf64_Rela *reloc) +{ +#ifndef RTLD_BOOTSTRAP + if (sym_map) + { + CHECK_STATIC_TLS (map, sym_map); +#endif + return TLS_TPREL_VALUE (sym_map, sym, reloc); +#ifndef RTLD_BOOTSTRAP + } +#endif + return 0; +} + +/* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline)) +resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) +{ +#if _CALL_ELF != 2 +#ifndef RESOLVE_CONFLICT_FIND_MAP + /* The function we are calling may not yet have its opd entry relocated. */ + Elf64_FuncDesc opd; + if (map != sym_map +# if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +# endif + && !sym_map->l_relocated) + { + Elf64_FuncDesc *func = (Elf64_FuncDesc *) value; + opd.fd_func = func->fd_func + sym_map->l_addr; + opd.fd_toc = func->fd_toc + sym_map->l_addr; + opd.fd_aux = func->fd_aux; + /* GCC 4.9+ eliminates the branch as dead code, force the odp set + dependency. */ + asm ("" : "=r" (value) : "0" (&opd), "X" (opd)); + } +#endif +#endif + return ((Elf64_Addr (*) (unsigned long int)) value) (GLRO(dl_hwcap)); +} + +/* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + void *const reloc_addr_arg, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + const int r_type = ELF64_R_TYPE (reloc->r_info); + const Elf64_Sym *const refsym = sym; + union unaligned + { + uint16_t u2; + uint32_t u4; + uint64_t u8; + } __attribute__ ((__packed__)); + + if (r_type == R_PPC64_RELATIVE) + { + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (__glibc_unlikely (r_type == R_PPC64_NONE)) + return; + + /* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt + and STT_GNU_IFUNC. */ + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + Elf64_Addr value = ((sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value) + + reloc->r_addend); + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = resolve_ifunc (value, map, sym_map); + + /* For relocs that don't edit code, return. + For relocs that might edit instructions, break from the switch. */ + switch (r_type) + { + case R_PPC64_ADDR64: + case R_PPC64_GLOB_DAT: + *reloc_addr = value; + return; + + case R_PPC64_IRELATIVE: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + *reloc_addr = value; + return; + + case R_PPC64_JMP_IREL: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + /* Fall thru */ + case R_PPC64_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value); +#else + elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); +#endif + return; + + case R_PPC64_DTPMOD64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; +#else + if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; + } + } +#endif + } +#ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; +#else + /* Get the information from the link map returned by the + resolve function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +#endif + return; + + case R_PPC64_DTPREL64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; +#else + if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC64_DTPMOD64. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; + } + } +#endif + } + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ +#ifndef RTLD_BOOTSTRAP + if (sym_map != NULL) + *reloc_addr = TLS_DTPREL_VALUE (sym, reloc); +#endif + return; + + case R_PPC64_TPREL64: + *reloc_addr = elf_machine_tprel (map, sym_map, sym, reloc); + return; + + case R_PPC64_TPREL16_LO_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_LO: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_HI: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HI", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HIGH: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HA", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHER: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_TPREL16_HIGHEST: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_TPREL16_HIGHERA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_TPREL16_HIGHESTA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + +#ifndef RTLD_BOOTSTRAP /* None of the following appear in ld.so */ + case R_PPC64_ADDR16_LO_DS: + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_LO: + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_ADDR16_HI: + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HI", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGH: + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_ADDR16_HA: + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HA", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGHA: + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_ADDR30: + { + Elf64_Addr delta = value - (Elf64_Xword) reloc_addr; + if (dont_expect ((delta + 0x80000000) >= 0x100000000LL + || (delta & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR30", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, delta, 0xfffffffc); + } + break; + + case R_PPC64_COPY: + if (dont_expect (sym == NULL)) + /* This can happen in trace mode when an object could not be found. */ + return; + if (dont_expect (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) + && sym->st_size < refsym->st_size))) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("%s: Symbol `%s' has different size" \ + " in shared object," \ + " consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (char *) value, + MIN (sym->st_size, refsym->st_size)); + return; + + case R_PPC64_UADDR64: + ((union unaligned *) reloc_addr)->u8 = value; + return; + + case R_PPC64_UADDR32: + ((union unaligned *) reloc_addr)->u4 = value; + return; + + case R_PPC64_ADDR32: + if (dont_expect ((value + 0x80000000) >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR32", reloc_addr, refsym); + *(Elf64_Word *) reloc_addr = value; + return; + + case R_PPC64_ADDR24: + if (dont_expect ((value + 0x2000000) >= 0x4000000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR24", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, value, 0x3fffffc); + break; + + case R_PPC64_ADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = value; + break; + + case R_PPC64_UADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym); + ((union unaligned *) reloc_addr)->u2 = value; + return; + + case R_PPC64_ADDR16_DS: + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_HIGHER: + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_ADDR16_HIGHERA: + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_ADDR16_HIGHESTA: + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + + case R_PPC64_ADDR14: + case R_PPC64_ADDR14_BRTAKEN: + case R_PPC64_ADDR14_BRNTAKEN: + { + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR14", reloc_addr, refsym); + Elf64_Word insn = *(Elf64_Word *) reloc_addr; + BIT_INSERT (insn, value, 0xfffc); + if (r_type != R_PPC64_ADDR14) + { + insn &= ~(1 << 21); + if (r_type == R_PPC64_ADDR14_BRTAKEN) + insn |= 1 << 21; + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + *(Elf64_Word *) reloc_addr = insn; + } + break; + + case R_PPC64_REL32: + *(Elf64_Word *) reloc_addr = value - (Elf64_Addr) reloc_addr; + return; + + case R_PPC64_REL64: + *reloc_addr = value - (Elf64_Addr) reloc_addr; + return; +#endif /* !RTLD_BOOTSTRAP */ + + default: + _dl_reloc_bad_type (map, r_type, 0); + return; + } + MODIFIED_CODE_NOQUEUE (reloc_addr); +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + /* elf_machine_runtime_setup handles this. */ +} + + +#endif /* RESOLVE */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S new file mode 100644 index 0000000000..5ec729d1f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -0,0 +1,500 @@ +/* PLT trampolines. PPC64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + + + .section ".text" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) need to be converted to an offset + (index * 24) in parm2 (r4). */ + +#define FRAME_SIZE (FRAME_MIN_SIZE+64) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; Use local var space rather than the parameter save area, + because gcc as of 2010/05 doesn't allocate a proper stack frame for + a function that makes no calls except for __tls_get_addr and we + might be here resolving the __tls_get_addr call. */ +#define INT_PARMS FRAME_MIN_SIZE +EALIGN(_dl_runtime_resolve, 4, 0) + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 + std r5,INT_PARMS+16(r1) + add r4,r4,r0 + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 + std r7,INT_PARMS+32(r1) + mflr r0 + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + std r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + bl JUMPTARGET(_dl_fixup) +#ifndef SHARED + nop +#endif +/* Put the registers back. */ + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE + bctr +END(_dl_runtime_resolve) +#undef FRAME_SIZE +#undef INT_PARMS + + /* Stack layout: ELFv2 ABI. + +752 previous backchain + +744 spill_r31 + +736 spill_r30 + +720 v8 + +704 v7 + +688 v6 + +672 v5 + +656 v4 + +640 v3 + +624 v2 + +608 v1 + +600 fp10 + ELFv1 ABI +592 fp9 + +592 previous backchain +584 fp8 + +584 spill_r31 +576 fp7 + +576 spill_r30 +568 fp6 + +560 v1 +560 fp5 + +552 fp4 +552 fp4 + +544 fp3 +544 fp3 + +536 fp2 +536 fp2 + +528 fp1 +528 fp1 + +520 r4 +520 r4 + +512 r3 +512 r3 + return values + +504 free + +496 stackframe + +488 lr + +480 r1 + +464 v13 + +448 v12 + +432 v11 + +416 v10 + +400 v9 + +384 v8 + +368 v7 + +352 v6 + +336 v5 + +320 v4 + +304 v3 + +288 v2 + * VMX Parms in V2-V13, V0-V1 are scratch + +284 vrsave + +280 free + +272 fp13 + +264 fp12 + +256 fp11 + +248 fp10 + +240 fp9 + +232 fp8 + +224 fp7 + +216 fp6 + +208 fp5 + +200 fp4 + +192 fp3 + +184 fp2 + +176 fp1 + * FP Parms in FP1-FP13, FP0 is a scratch register + +168 r10 + +160 r9 + +152 r8 + +144 r7 + +136 r6 + +128 r5 + +120 r4 + +112 r3 + * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC + +104 parm8 + +96 parm7 + +88 parm6 + +80 parm5 + +72 parm4 + +64 parm3 + +56 parm2 + +48 parm1 + * Parameter save area + * (v1 ABI: Allocated by the call, at least 8 double words) + +40 v1 ABI: TOC save area + +32 v1 ABI: Reserved for linker + +24 v1 ABI: Reserved for compiler / v2 ABI: TOC save area + +16 LR save area + +8 CR save area + r1+0 stack back chain + */ +#if _CALL_ELF == 2 +# define FRAME_SIZE 752 +# define VR_RTN 608 +#else +# define FRAME_SIZE 592 +# define VR_RTN 560 +#endif +#define INT_RTN 512 +#define FPR_RTN 528 +#define STACK_FRAME 496 +#define CALLING_LR 488 +#define CALLING_SP 480 +#define INT_PARMS 112 +#define FPR_PARMS 176 +#define VR_PARMS 288 +#define VR_VRSAVE 284 + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" + + .machine "altivec" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) needs to be converted to an offset + (index * 24) in parm2 (r4). */ +#ifndef PROF +EALIGN(_dl_profile_resolve, 4, 0) +/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we + need to call _dl_call_pltexit. */ + std r31,-8(r1) + std r30,-16(r1) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; the registers are saved in a stack frame. */ + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + cfi_offset(r31,-8) + cfi_offset(r30,-16) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 /* index * 2 */ + std r5,INT_PARMS+16(r1) + add r4,r4,r0 /* index * 3 */ + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 /* index * 24 == PLT offset */ + mflr r5 + std r7,INT_PARMS+32(r1) + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + la r8,FRAME_SIZE(r1) + std r5,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r5,CALLING_LR(r1) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) + ld r12,.LC__dl_hwcap@toc(r2) +#ifdef SHARED + /* Load _rtld_local_ro._dl_hwcap. */ + ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) +#else + ld r12,0(r12) /* Load extern _dl_hwcap. */ +#endif + andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(saveFP) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + li r11,32 + li r12,64 + stvx v2,0,r10 + stvx v3,0,r9 + + stvx v4,r11,r10 + stvx v5,r11,r9 + addi r11,r11,64 + + stvx v6,r12,r10 + stvx v7,r12,r9 + addi r12,r12,64 + + stvx v8,r11,r10 + stvx v9,r11,r9 + addi r11,r11,64 + + stvx v10,r12,r10 + stvx v11,r12,r9 + mfspr r0,VRSAVE + + stvx v12,r11,r10 + stvx v13,r11,r9 +L(saveFP): + stw r0,VR_VRSAVE(r1) +/* Save floating registers. */ + stfd fp1,FPR_PARMS+0(r1) + stfd fp2,FPR_PARMS+8(r1) + stfd fp3,FPR_PARMS+16(r1) + stfd fp4,FPR_PARMS+24(r1) + stfd fp5,FPR_PARMS+32(r1) + stfd fp6,FPR_PARMS+40(r1) + stfd fp7,FPR_PARMS+48(r1) + stfd fp8,FPR_PARMS+56(r1) + stfd fp9,FPR_PARMS+64(r1) + stfd fp10,FPR_PARMS+72(r1) + stfd fp11,FPR_PARMS+80(r1) + li r0,-1 + stfd fp12,FPR_PARMS+88(r1) + stfd fp13,FPR_PARMS+96(r1) +/* Load the extra parameters. */ + addi r6,r1,INT_PARMS + addi r7,r1,STACK_FRAME +/* Save link_map* and reloc_addr parms for later. */ + mr r31,r3 + mr r30,r4 + std r0,0(r7) + bl JUMPTARGET(_dl_profile_fixup) +#ifndef SHARED + nop +#endif +/* Test *framesizep > 0 to see if need to do pltexit processing. */ + ld r0,STACK_FRAME(r1) +/* Put the registers back. */ + lwz r12,VR_VRSAVE(r1) + cmpdi cr1,r0,0 + cmpdi cr0,r12,0 + bgt cr1,L(do_pltexit) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) +/* VRSAVE must be non-zero if VMX is present and VRs are in use. */ + beq L(restoreFXR) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Unwind the stack frame, and jump. */ + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + addi r1,r1,FRAME_SIZE + bctr + +L(do_pltexit): + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + beq L(restoreFXR2) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR2): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + std r2,FRAME_TOC_SAVE(r1) + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Call the target function. */ + bctrl + ld r2,FRAME_TOC_SAVE(r1) + lwz r12,VR_VRSAVE(r1) +/* But return here and store the return values. */ + std r3,INT_RTN(r1) + std r4,INT_RTN+8(r1) + stfd fp1,FPR_RTN+0(r1) + stfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) + stfd fp3,FPR_RTN+16(r1) + stfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + stfd fp5,FPR_RTN+32(r1) + stfd fp6,FPR_RTN+40(r1) + li r5,32 + li r6,64 + stfd fp7,FPR_RTN+48(r1) + stfd fp8,FPR_RTN+56(r1) + stfd fp9,FPR_RTN+64(r1) + stfd fp10,FPR_RTN+72(r1) +#endif + mr r3,r31 + mr r4,r30 + beq L(callpltexit) + stvx v2,0,r10 +#if _CALL_ELF == 2 + stvx v3,0,r12 + stvx v4,r5,r10 + stvx v5,r5,r12 + addi r5,r5,64 + stvx v6,r6,r10 + stvx v7,r6,r12 + stvx v8,r5,r10 + stvx v9,r5,r12 +#endif +L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN + bl JUMPTARGET(_dl_call_pltexit) +#ifndef SHARED + nop +#endif +/* Restore the return values from target function. */ + lwz r12,VR_VRSAVE(r1) + ld r3,INT_RTN(r1) + ld r4,INT_RTN+8(r1) + lfd fp1,FPR_RTN+0(r1) + lfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r11,VR_RTN(r1) + lfd fp3,FPR_RTN+16(r1) + lfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + lfd fp5,FPR_RTN+32(r1) + lfd fp6,FPR_RTN+40(r1) + li r30,32 + li r31,64 + lfd fp7,FPR_RTN+48(r1) + lfd fp8,FPR_RTN+56(r1) + lfd fp9,FPR_RTN+64(r1) + lfd fp10,FPR_RTN+72(r1) +#endif + beq L(pltexitreturn) + lvx v2,0,r11 +#if _CALL_ELF == 2 + lvx v3,0,r12 + lvx v4,r30,r11 + lvx v5,r30,r12 + addi r30,r30,64 + lvx v6,r31,r11 + lvx v7,r31,r12 + lvx v8,r30,r11 + lvx v9,r30,r12 +#endif +L(pltexitreturn): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + mtlr r0 + ld r1,0(r1) + blr +END(_dl_profile_resolve) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h new file mode 100644 index 0000000000..9131d9ceb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h @@ -0,0 +1,37 @@ +/* Finding the entry point and start of text. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#ifndef __ASSEMBLY__ +extern void _start (void); +#endif + +#define ENTRY_POINT _start + +#if _CALL_ELF != 2 +/* We have to provide a special declaration. */ +#define ENTRY_POINT_DECL(class) class void _start (void); + +/* Use the address of ._start as the lowest address for which we need + to keep profiling records. We can't copy the ia64 scheme as our + entry poiny address is really the address of the function + descriptor, not the actual function entry. */ +#define TEXT_START \ + ({ extern unsigned long int _start_as_data[] asm ("_start"); \ + _start_as_data[0]; }) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c new file mode 100644 index 0000000000..ae18f127a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c @@ -0,0 +1,37 @@ +/* Find first set bit in a word, counted from least significant end. + For PowerPC. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define ffsl __something_else +#include <limits.h> +#include <string.h> + +#undef ffs + +int +__ffsll (long long int x) +{ + int cnt; + + asm ("cntlzd %0,%1" : "=r" (cnt) : "r" (x & -x)); + return 64 - cnt; +} +weak_alias (__ffsll, ffsll) +#undef ffsl +weak_alias (__ffsll, ffsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile new file mode 100644 index 0000000000..317a988854 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -0,0 +1,44 @@ +ifeq ($(subdir),math) +sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_copysign-power6 \ + s_copysign-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 s_isnan-power8 \ + s_isinf-power8 s_finite-power8 + +libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_llround-power6x \ + s_llround-power5+ s_llround-ppc64 s_ceil-power5+ \ + s_ceil-ppc64 s_ceilf-power5+ s_ceilf-ppc64 \ + s_floor-power5+ s_floor-ppc64 s_floorf-power5+ \ + s_floorf-ppc64 s_round-power5+ s_round-ppc64 \ + s_roundf-power5+ s_roundf-ppc64 s_trunc-power5+ \ + s_trunc-ppc64 s_truncf-power5+ s_truncf-ppc64 \ + s_copysign-power6 s_copysign-ppc64 s_llrint-power6x \ + s_llrint-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_logb-power7 s_logbf-power7 \ + s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \ + s_logbl-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ + s_llrint-power8 s_llround-power8 \ + e_expf-power8 e_expf-ppc64 \ + s_sinf-ppc64 s_sinf-power8 \ + s_cosf-ppc64 s_cosf-power8 + +CFLAGS-s_logbf-power7.c = -mcpu=power7 +CFLAGS-s_logbl-power7.c = -mcpu=power7 +CFLAGS-s_logb-power7.c = -mcpu=power7 +CFLAGS-s_modf-power5+.c = -mcpu=power5+ +CFLAGS-s_modff-power5+.c = -mcpu=power5+ +CFLAGS-e_hypot-power7.c = -mcpu=power7 +CFLAGS-e_hypotf-power7.c = -mcpu=power7 + +# These files quiet sNaNs in a way that is optimized away without +# -fsignaling-nans. +CFLAGS-s_modf-ppc64.c += -fsignaling-nans +CFLAGS-s_modff-ppc64.c += -fsignaling-nans +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S new file mode 100644 index 0000000000..1e6cc51d9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S @@ -0,0 +1,26 @@ +/* __ieee754_expf() POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c new file mode 100644 index 0000000000..b236290ea2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c @@ -0,0 +1,24 @@ +/* __ieee_expf() PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_ppc64 + +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c new file mode 100644 index 0000000000..577093675c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c @@ -0,0 +1,31 @@ +/* Multiple versions of ieee754_expf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include "init-arch.h" + +extern __typeof (__ieee754_expf) __ieee754_expf_ppc64 attribute_hidden; +extern __typeof (__ieee754_expf) __ieee754_expf_power8 attribute_hidden; + +libc_ifunc (__ieee754_expf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __ieee754_expf_power8 + : __ieee754_expf_ppc64); + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c new file mode 100644 index 0000000000..dbe9b33e2e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypot() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c new file mode 100644 index 0000000000..baebb36ae3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c new file mode 100644 index 0000000000..6a3d60a830 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypot, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypot_power7 + : __ieee754_hypot_ppc64); + +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c new file mode 100644 index 0000000000..70584863f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypotf() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c new file mode 100644 index 0000000000..839e94e56c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c new file mode 100644 index 0000000000..2c8112d3b2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypotf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypotf_power7 + : __ieee754_hypotf_ppc64); + +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S new file mode 100644 index 0000000000..bbea647da7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S new file mode 100644 index 0000000000..c19bb42329 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..968e8cb17e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c @@ -0,0 +1,40 @@ +/* Multiple versions of ceil. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceil) __ceil_ppc64 attribute_hidden; +extern __typeof (__ceil) __ceil_power5plus attribute_hidden; + +libc_ifunc (__ceil, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceil_power5plus + : __ceil_ppc64); + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S new file mode 100644 index 0000000000..8e875ce679 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S new file mode 100644 index 0000000000..c9d31da2a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..7d4a028fe5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ceilf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceilf) __ceilf_ppc64 attribute_hidden; +extern __typeof (__ceilf) __ceilf_power5plus attribute_hidden; + +libc_ifunc (__ceilf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceilf_power5plus + : __ceilf_ppc64); + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S new file mode 100644 index 0000000000..d59fbfae73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S @@ -0,0 +1,33 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) +#undef hidden_def +#define hidden_def(name) + +#define __copysign __copysign_power6 + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S new file mode 100644 index 0000000000..3f4fbc96ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S @@ -0,0 +1,35 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __copysign __copysign_ppc64 +#undef hidden_def +#define hidden_def(name) \ + strong_alias (__copysign_ppc64, __GI___copysign) + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c new file mode 100644 index 0000000000..2bfb625bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c @@ -0,0 +1,51 @@ +/* Multiple versions of copysign. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine copysign so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#undef __copysign +#define __copysign __redirect_copysign +#include <math.h> +#include <math_ldbl_opt.h> +#undef __copysign +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__redirect_copysign) __copysign_ppc64 attribute_hidden; +extern __typeof (__redirect_copysign) __copysign_power6 attribute_hidden; + +extern __typeof (__redirect_copysign) __libm_copysign; +libc_ifunc (__libm_copysign, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +strong_alias (__libm_copysign, __copysign) +weak_alias (__copysign, copysign) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c new file mode 100644 index 0000000000..c9be2b6811 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c @@ -0,0 +1,32 @@ +/* Multiple versions of copysignf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__copysignf) __copysign_ppc64 attribute_hidden; +extern __typeof (__copysignf) __copysign_power6 attribute_hidden; + +libc_ifunc (__copysignf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S new file mode 100644 index 0000000000..ee00a2c43a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64/power8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __cosf __cosf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c new file mode 100644 index 0000000000..635624c538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64 default version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __cosf __cosf_ppc64 + +#include <sysdeps/powerpc/fpu/s_cosf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c new file mode 100644 index 0000000000..acf2a59d69 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c @@ -0,0 +1,31 @@ +/* Multiple versions of cosf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__cosf) __cosf_ppc64 attribute_hidden; +extern __typeof (__cosf) __cosf_power8 attribute_hidden; + +libc_ifunc (__cosf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __cosf_power8 + : __cosf_ppc64); + +weak_alias (__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S new file mode 100644 index 0000000000..9220383ee6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S new file mode 100644 index 0000000000..fa878ab3e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c new file mode 100644 index 0000000000..fabd9b0e3d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c @@ -0,0 +1,34 @@ +/* finite(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define FINITE __finite_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finite_ppc64, __GI___finite, __finite_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_finite.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c new file mode 100644 index 0000000000..a5ec36b72f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c @@ -0,0 +1,60 @@ +/* Multiple versions of finite. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finite __redirect___finite +#define __finitef __redirect___finitef +#define __finitel __redirect___finitel +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finite) __finite_ppc64 attribute_hidden; +extern __typeof (__finite) __finite_power7 attribute_hidden; +extern __typeof (__finite) __finite_power8 attribute_hidden; +#undef __finite +#undef __finitef +#undef __finitel + +libc_ifunc_redirected (__redirect___finite, __finite, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finite_ppc64); + +weak_alias (__finite, finite) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0); +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1); +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c new file mode 100644 index 0000000000..c6f8033e6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c @@ -0,0 +1,32 @@ +/* finitef(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define FINITEF __finitef_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finitef_ppc64, __GI___finitef, __finitef_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_finitef.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c new file mode 100644 index 0000000000..cdd7824efb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c @@ -0,0 +1,37 @@ +/* Multiple versions of finitef. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finitef __redirect___finitef +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finitef) __finitef_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__finitef) __finite_power7 attribute_hidden; +extern __typeof (__finitef) __finite_power8 attribute_hidden; +#undef __finitef + +libc_ifunc_redirected (__redirect___finitef, __finitef, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finitef_ppc64); + +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S new file mode 100644 index 0000000000..24f2460693 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S new file mode 100644 index 0000000000..5ec9a33d89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..6ab7a35490 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c @@ -0,0 +1,40 @@ +/* Multiple versions of floor. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floor) __floor_ppc64 attribute_hidden; +extern __typeof (__floor) __floor_power5plus attribute_hidden; + +libc_ifunc (__floor, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floor_power5plus + : __floor_ppc64); + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S new file mode 100644 index 0000000000..8b621de68e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S @@ -0,0 +1,26 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S new file mode 100644 index 0000000000..3feea6e162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S @@ -0,0 +1,27 @@ +/* floorf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..ee96536247 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Multiple versions of floorf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floorf) __floorf_ppc64 attribute_hidden; +extern __typeof (__floorf) __floorf_power5plus attribute_hidden; + +libc_ifunc (__floorf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floorf_power5plus + : __floorf_ppc64); + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S new file mode 100644 index 0000000000..33a7e3de1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S new file mode 100644 index 0000000000..b630696927 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c new file mode 100644 index 0000000000..e7f64438b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __isinf __isinf_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinf_ppc64, __GI___isinf, __isinf_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_isinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c new file mode 100644 index 0000000000..9c6789c7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c @@ -0,0 +1,53 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinf __redirect___isinf +#define __isinff __redirect___isinff +#define __isinfl __redirect___isinfl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinf) __isinf_ppc64 attribute_hidden; +extern __typeof (__isinf) __isinf_power7 attribute_hidden; +extern __typeof (__isinf) __isinf_power8 attribute_hidden; +#undef __isinf +#undef __isinff +#undef __isinfl + +libc_ifunc_redirected (__redirect___isinf, __isinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinf_ppc64); + +weak_alias (__isinf, isinf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c new file mode 100644 index 0000000000..e58e0b53be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c @@ -0,0 +1,31 @@ +/* isinff(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __isinff __isinff_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinff_ppc64, __GI___isinff, __isinff_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_isinff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c new file mode 100644 index 0000000000..439e0b80d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c @@ -0,0 +1,38 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinff __redirect___isinff +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinff) __isinff_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__isinff) __isinf_power7 attribute_hidden; +extern __typeof (__isinff) __isinf_power8 attribute_hidden; +#undef __isinff + +libc_ifunc_redirected (__redirect___isinff, __isinff, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinff_ppc64); + +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S new file mode 100644 index 0000000000..18d368a63b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power5 + +#include <sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S new file mode 100644 index 0000000000..7f0eae0430 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6 + +#include <sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S new file mode 100644 index 0000000000..aa283096ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6X version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S new file mode 100644 index 0000000000..b67d58e2ea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S new file mode 100644 index 0000000000..03151b3087 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S new file mode 100644 index 0000000000..ee219c14be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S @@ -0,0 +1,32 @@ +/* isnan(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __isnan __isnan_ppc64 +#undef hidden_def +#define hidden_def(name) \ + .globl __GI___isnan ; .set __GI___isnan,__isnan_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c new file mode 100644 index 0000000000..3cfe1793da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c @@ -0,0 +1,62 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isnan __redirect___isnan +#define __isnanf __redirect___isnanf +#define __isnanl __redirect___isnanl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isnan) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnan) __isnan_power5 attribute_hidden; +extern __typeof (__isnan) __isnan_power6 attribute_hidden; +extern __typeof (__isnan) __isnan_power6x attribute_hidden; +extern __typeof (__isnan) __isnan_power7 attribute_hidden; +extern __typeof (__isnan) __isnan_power8 attribute_hidden; +#undef __isnan +#undef __isnanf +#undef __isnanl + +libc_ifunc_redirected (__redirect___isnan, __isnan, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c new file mode 100644 index 0000000000..958c373245 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c @@ -0,0 +1,44 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* The double-precision implementation also works for the single one. */ +extern __typeof (__isnanf) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnanf) __isnan_power5 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6x attribute_hidden; +extern __typeof (__isnanf) __isnan_power7 attribute_hidden; +extern __typeof (__isnanf) __isnan_power8 attribute_hidden; + +libc_ifunc_hidden (__isnanf, __isnanf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S new file mode 100644 index 0000000000..f9b1616be3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S new file mode 100644 index 0000000000..b7f5276a66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S new file mode 100644 index 0000000000..b92dafbcdb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c new file mode 100644 index 0000000000..8db494cfde --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c @@ -0,0 +1,60 @@ +/* Multiple versions of llrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine lrint/__lrint so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#define lrint __hidden_lrint +#define __lrint __hidden___lrint + +#include <math.h> +#include <math_ldbl_opt.h> +#undef lrint +#undef __lrint +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrint) __llrint_ppc64 attribute_hidden; +extern __typeof (__llrint) __llrint_power6x attribute_hidden; +extern __typeof (__llrint) __llrint_power8 attribute_hidden; + +libc_ifunc (__llrint, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llrint_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llrint_power6x + : __llrint_ppc64); + +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PowerPC64. */ +strong_alias (__llrint, __lrint) +weak_alias (__lrint, lrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S new file mode 100644 index 0000000000..b8305ce968 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power5plus +#define __lround __lround_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S new file mode 100644 index 0000000000..1145aff2a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power6x +#define __lround __lround_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S new file mode 100644 index 0000000000..8d6190df89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S @@ -0,0 +1,31 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S new file mode 100644 index 0000000000..8e6f9aed78 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S @@ -0,0 +1,28 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_ppc64 +#define __lround __lround_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c new file mode 100644 index 0000000000..cb1a446158 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c @@ -0,0 +1,63 @@ +/* Multiple versions of llround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define lround __hidden_lround +#define __lround __hidden___lround + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llround) __llround_ppc64 attribute_hidden; +extern __typeof (__llround) __llround_power5plus attribute_hidden; +extern __typeof (__llround) __llround_power6x attribute_hidden; +extern __typeof (__llround) __llround_power8 attribute_hidden; + +libc_ifunc (__llround, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llround_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llround_power6x : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc64); + +weak_alias (__llround, llround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +compat_symbol (libm, llround, lroundl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PPC64. */ +#undef lround +#undef __lround +strong_alias (__llround, __lround) +weak_alias (__llround, lround) +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c new file mode 100644 index 0000000000..73b5e2d48c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c new file mode 100644 index 0000000000..e428b9a29a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c @@ -0,0 +1,28 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __logb __logb_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c new file mode 100644 index 0000000000..d70919e3d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c @@ -0,0 +1,41 @@ +/* Multiple versions of logb. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logb) __logb_ppc64 attribute_hidden; +extern __typeof (__logb) __logb_power7 attribute_hidden; + +libc_ifunc (__logb, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logb_power7 + : __logb_ppc64); + +weak_alias (__logb, logb) + +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c new file mode 100644 index 0000000000..02e04318e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c new file mode 100644 index 0000000000..147b710c73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC64 default implementation. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c new file mode 100644 index 0000000000..1cacc8a950 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbf) __logbf_ppc64 attribute_hidden; +extern __typeof (__logbf) __logbf_power7 attribute_hidden; + +libc_ifunc (__logbf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbf_power7 + : __logbf_ppc64); + +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c new file mode 100644 index 0000000000..60ec533b8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c new file mode 100644 index 0000000000..502410f877 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC64/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_ppc64 + +#include <sysdeps/ieee754/ldbl-128ibm/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c new file mode 100644 index 0000000000..63b9c812e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbl. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbl) __logbl_ppc64 attribute_hidden; +extern __typeof (__logbl) __logbl_power7 attribute_hidden; + +libc_ifunc (__logbl, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbl_power7 + : __logbl_ppc64); + +long_double_symbol (libm, __logbl, logbl); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c new file mode 100644 index 0000000000..d09286267b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c @@ -0,0 +1 @@ + /* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c new file mode 100644 index 0000000000..0dab5443e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c @@ -0,0 +1 @@ +/* __lround is in s_llround.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c new file mode 100644 index 0000000000..c923f84d97 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c new file mode 100644 index 0000000000..43318ee4dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __modf __modf_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..3e79b2bd5a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c @@ -0,0 +1,44 @@ +/* Multiple versions of modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__modf) __modf_ppc64 attribute_hidden; +extern __typeof (__modf) __modf_power5plus attribute_hidden; + +libc_ifunc (__modf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modf_power5plus + : __modf_ppc64); + +weak_alias (__modf, modf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c new file mode 100644 index 0000000000..22dbf5341e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c new file mode 100644 index 0000000000..6fc97f0114 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c @@ -0,0 +1,26 @@ +/* PowerPC64 default implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_ppc64 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..f57939cc66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c @@ -0,0 +1,30 @@ +/* Multiple versions of modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +extern __typeof (__modff) __modff_ppc64 attribute_hidden; +extern __typeof (__modff) __modff_power5plus attribute_hidden; + +libc_ifunc (__modff, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modff_power5plus + : __modff_ppc64); + +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S new file mode 100644 index 0000000000..a7c7492f21 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S new file mode 100644 index 0000000000..44a2b0105a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c new file mode 100644 index 0000000000..d440f6f45c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c @@ -0,0 +1,40 @@ +/* Multiple versions of round. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__round) __round_ppc64 attribute_hidden; +extern __typeof (__round) __round_power5plus attribute_hidden; + +libc_ifunc (__round, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __round_power5plus + : __round_ppc64); + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +strong_alias (__round, __roundl) +weak_alias (__round, roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __round, roundl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S new file mode 100644 index 0000000000..81501a1547 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S new file mode 100644 index 0000000000..8f3b24c556 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c new file mode 100644 index 0000000000..09609d3e91 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c @@ -0,0 +1,32 @@ +/* Multiple versions of roundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__roundf) __roundf_ppc64 attribute_hidden; +extern __typeof (__roundf) __roundf_power5plus attribute_hidden; + +libc_ifunc (__roundf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __roundf_power5plus + : __roundf_ppc64); + +weak_alias (__roundf, roundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S new file mode 100644 index 0000000000..3d01533da8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c new file mode 100644 index 0000000000..83e37f92c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64 default version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c new file mode 100644 index 0000000000..6d7d6ce50d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c @@ -0,0 +1,31 @@ +/* Multiple versions of sinf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sinf) __sinf_ppc64 attribute_hidden; +extern __typeof (__sinf) __sinf_power8 attribute_hidden; + +libc_ifunc (__sinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __sinf_power8 + : __sinf_ppc64); + +weak_alias (__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S new file mode 100644 index 0000000000..53d8cd5013 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S new file mode 100644 index 0000000000..36e8fd05c2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..54844d5ff2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c @@ -0,0 +1,40 @@ +/* Multiple versions of trunc. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__trunc) __trunc_ppc64 attribute_hidden; +extern __typeof (__trunc) __trunc_power5plus attribute_hidden; + +libc_ifunc (__trunc, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __trunc_power5plus + : __trunc_ppc64); + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __trunc, truncl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S new file mode 100644 index 0000000000..e28de7cb1e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S new file mode 100644 index 0000000000..b60242d83b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..2c46525235 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Multiple versions of truncf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__truncf) __truncf_ppc64 attribute_hidden; +extern __typeof (__truncf) __truncf_power5plus attribute_hidden; + +libc_ifunc (__truncf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __truncf_power5plus + : __truncf_ppc64); + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S new file mode 100644 index 0000000000..78d7feefed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S @@ -0,0 +1,72 @@ +/* ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S new file mode 100644 index 0000000000..bc5ab02cb0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S @@ -0,0 +1,66 @@ +/* float ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S new file mode 100644 index 0000000000..59472816c7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S @@ -0,0 +1,59 @@ +/* Copy a sign bit between floating-point values. PowerPC64 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysign) + CALL_MCOUNT 0 +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp2,-8(r1) + nop + nop + nop + ld r3,-8(r1) + cmpdi r3,0 + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias (__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias (__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S new file mode 100644 index 0000000000..e05438ae7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S new file mode 100644 index 0000000000..b33ea6e256 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S @@ -0,0 +1,48 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp3,-16(r1) + ld r3,-16(r1) + cmpdi r3,0 + blt L(0) + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +L(0): + fmr fp0,fp1 + fnabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp3,fp2 + blr +END (__copysignl) + +#if IS_IN (libm) +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S new file mode 100644 index 0000000000..53d21301ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S new file mode 100644 index 0000000000..7603abba5d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S @@ -0,0 +1,34 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S new file mode 100644 index 0000000000..4a6cc0ebba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S @@ -0,0 +1,72 @@ +/* Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S new file mode 100644 index 0000000000..d8b5e21248 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S @@ -0,0 +1,66 @@ +/* float Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S new file mode 100644 index 0000000000..d40695c633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fma.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fma, fmal, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S new file mode 100644 index 0000000000..6cba2d4408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S new file mode 100644 index 0000000000..39e765434a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S @@ -0,0 +1,47 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S new file mode 100644 index 0000000000..4050be6437 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S @@ -0,0 +1,36 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrintf) + +strong_alias (__llrintf, __lrintf) +weak_alias (__llrintf, llrintf) +weak_alias (__lrintf, lrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S new file mode 100644 index 0000000000..0803ba1eb3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S @@ -0,0 +1,96 @@ +/* llround function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* long long [r3] llround (double x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^52 and 2^53-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^52 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llround) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^52 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^52 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^52 > x < -2^52 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp9 + fsub fp3,fp3,fp9 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S new file mode 100644 index 0000000000..3e910ac322 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S @@ -0,0 +1,88 @@ +/* llroundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: /* 2^23 */ + .tc FD_41600000_0[TC],0x4160000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 +.LC2: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* long long [r3] llroundf (float x [fp1]) + IEEE 1003.1 llroundf function. IEEE specifies "roundf to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "roundf to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^23 and 2^24-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^23 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llroundf) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^23 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + lfd fp11,.LC2@toc(2) /* Load 2^52 into fpr11. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^23 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^23 > x < -2^23 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp11 + fsub fp3,fp3,fp11 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llroundf) + +strong_alias (__llroundf, __lroundf) +weak_alias (__llroundf, llroundf) +weak_alias (__lroundf, lroundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S new file mode 100644 index 0000000000..d3c2fff581 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S @@ -0,0 +1 @@ +/* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S new file mode 100644 index 0000000000..4306c405c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S @@ -0,0 +1 @@ +/* __lround is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S new file mode 100644 index 0000000000..6b2a4e37a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S @@ -0,0 +1 @@ +/* __lroundf is in s_llroundf.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S new file mode 100644 index 0000000000..3dcd04b1f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S @@ -0,0 +1,75 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__nearbyint, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfd fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x+= TWO52 */ + fsub fp1,fp1,fp13 /* x-= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyint) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..11be35f94e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S @@ -0,0 +1,68 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1]) */ + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__nearbyintf, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfs fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S new file mode 100644 index 0000000000..7ba0adff84 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -0,0 +1,65 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__rint, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__rint) + +weak_alias (__rint, rint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__rint, rintl) +strong_alias (__rint, __rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S new file mode 100644 index 0000000000..b1d1e158c0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -0,0 +1,56 @@ +/* Round float to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__rintf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__rintf) + +weak_alias (__rintf, rintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S new file mode 100644 index 0000000000..fe315af51d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S @@ -0,0 +1,87 @@ +/* round function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* double [fp1] round (double x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfd fp10,.LC1@toc(2) + ble- cr6,.L4 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsub fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S new file mode 100644 index 0000000000..d213f43566 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S @@ -0,0 +1,81 @@ +/* roundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 +.LC1: /* 0.5 */ + .long 0x3f000000 + + .section ".text" + +/* float [fp1] roundf (float x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfs fp10,.LC1@toc(2) + ble- cr6,.L4 + fadds fp1,fp1,fp10 /* x+= 0.5; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsubs fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp9,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S new file mode 100644 index 0000000000..890eb21c54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S @@ -0,0 +1,79 @@ +/* trunc function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* double [fp1] trunc (double x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**52 then + subtracting +-2**52. */ + +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S new file mode 100644 index 0000000000..cfcff80bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S @@ -0,0 +1,73 @@ +/* truncf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +/* float [fp1] truncf (float x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**23 then + subtracting +-2**23. */ + +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h new file mode 100644 index 0000000000..33064c6781 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h @@ -0,0 +1,46 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruction. But we are not interested + in accurate clock cycles here so we don't do this. */ +#ifdef _ARCH_PWR4 +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var)) +#else +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mftb %0" : "=r" (Var)) +#endif + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S new file mode 100644 index 0000000000..480e38688b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S @@ -0,0 +1,177 @@ +/* PowerPC64 mpn_lshift -- rp[] = up[] << cnt + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define CNT r6 + +#define TNC r0 +#define U0 r30 +#define U1 r31 +#define RETVAL r5 + +EALIGN(__mpn_lshift, 5, 0) + std U1, -8(r1) + std U0, -16(r1) + subfic TNC, CNT, 64 + sldi r7, N, RP + add UP, UP, r7 + add RP, RP, r7 + rldicl. U0, N, 0, 62 + cmpdi CNT, U0, 2 + addi U1, N, RP + ld r10, -8(UP) + srd RETVAL, r10, TNC + + srdi U1, U1, 2 + mtctr U1 + beq cr0, L(b00) + blt cr6, L(b01) + ld r11, -16(UP) + beq cr6, L(b10) + + .align 4 +L(b11): sld r8, r10, CNT + srd r9, r11, TNC + ld U1, -24(UP) + addi UP, UP, -24 + sld r12, r11, CNT + srd r7, U1, TNC + addi RP, RP, 16 + bdnz L(gt3) + + or r11, r8, r9 + sld r8, U1, CNT + b L(cj3) + + .align 4 +L(gt3): ld U0, -8(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + or r10, r12, r7 + b L(L11) + + .align 5 +L(b10): sld r12, r10, CNT + addi RP, RP, 24 + srd r7, r11, TNC + bdnz L(gt2) + + sld r8, r11, CNT + or r10, r12, r7 + b L(cj2) + +L(gt2): ld U0, -24(UP) + sld r8, r11, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, 0 + ld U0, -40(UP) + or r11, r8, r9 + addi UP, UP, -16 + b L(L10) + + .align 4 +L(b00): ld U1, -16(UP) + sld r12, r10, CNT + srd r7, U1, TNC + ld U0, -24(UP) + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, TNC + addi RP, RP, r8 + bdz L(cj4) + +L(gt4): addi UP, UP, -32 + ld U0, -8(UP) + or r11, r8, r9 + b L(L00) + + .align 4 +L(b01): bdnz L(gt1) + sld r8, r10, CNT + std r8, -8(RP) + b L(ret) + +L(gt1): ld U0, -16(UP) + sld r8, r10, CNT + srd r9, U0, TNC + ld U1, -24(UP) + sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -32(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -40(UP) + addi UP, UP, -40 + or r10, r12, r7 + bdz L(end) + + .align 5 +L(top): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -8(UP) + std r11, -8(RP) + or r11, r8, r9 +L(L00): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + std r10, -16(RP) + or r10, r12, r7 +L(L11): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -24(UP) + std r11, -24(RP) + or r11, r8, r9 +L(L10): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + addi UP, UP, -32 + std r10, -32(RP) + addi RP, RP, -32 + or r10, r12, r7 + bdnz L(top) + + .align 5 +L(end): sld r12, U0, CNT + srd r7, U1, TNC + std r11, -8(RP) +L(cj4): or r11, r8, r9 + sld r8, U1, CNT + std r10, -16(RP) +L(cj3): or r10, r12, r7 + std r11, -24(RP) +L(cj2): std r10, -32(RP) + std r8, -40(RP) + +L(ret): ld U1, -8(r1) + ld U0, -16(r1) + mr RP, RETVAL + blr +END(__mpn_lshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S new file mode 100644 index 0000000000..a4c82c31ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S @@ -0,0 +1,397 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + subf 5,10,12 + andi. 0,9,1 + cmpldi cr6,11,0 + sldi 10,10,3 + mr 11,9 + mr 4,3 + ld 6,0(5) + ld 7,8(5) + subfic 9,10,64 + beq 2f +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 +#else + sld 0,6,10 +#endif + cmpldi 11,1 + mr 6,7 + addi 4,4,-8 + addi 11,11,-1 + b 1f +2: addi 5,5,8 + .align 4 +#ifdef __LITTLE_ENDIAN__ +0: srd 0,6,10 + sld 8,7,9 +#else +0: sld 0,6,10 + srd 8,7,9 +#endif + cmpldi 11,2 + ld 6,8(5) + or 0,0,8 + addi 11,11,-2 + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 +1: sld 8,6,9 +#else + sld 0,7,10 +1: srd 8,6,9 +#endif + or 0,0,8 + beq 8f + ld 7,16(5) + std 0,8(4) + addi 5,5,16 + addi 4,4,16 + b 0b + .align 4 +8: + std 0,8(4) + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr6,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S new file mode 100644 index 0000000000..f6581b50f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S @@ -0,0 +1,265 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + ld rCLS,.LC0@toc(r2) + lwz rCLS,0(rCLS) +/* If the cache line size was not set just goto to L(nondcbz) which is + safe for any cache line size. */ + cmpldi cr1,rCLS,0 + beq cr1,L(nondcbz) + + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmpldi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +#ifndef NO_BZERO_IMPL +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero,TB_TOCLESS) + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S new file mode 100644 index 0000000000..68e85cbdc8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S @@ -0,0 +1,135 @@ +/* PowerPC64 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(__mpn_mul_1, 5, 0) + std r27, -40(r1) + std r26, -48(r1) + li r12, 0 + ld r26, 0(UP) + + rldicl. r0, N, 0, 62 + cmpdi VL, r0, 2 + addic N, N, RP + srdi N, N, 2 + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): mr cr7, r12 + mulld cr0, r26, VL + mulhdu r12, r26, VL + addi UP, UP, 8 + addc r0, r0, r7 + std r0, 0(RP) + addi RP, RP, 8 + b L(fic) + +L(b00): ld r27, r8(UP) + addi UP, UP, 16 + mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r0, r0, r12 + adde r7, r7, N + addze r12, r8 + std r0, 0(RP) + std r7, 8(RP) + addi RP, RP, 16 + b L(fic) + + nop +L(b01): bdnz L(gt1) + mulld r0, r26, VL + mulhdu r8, r26, VL + addc r0, r0, r12 + std r0, 0(RP) + b L(ret) +L(gt1): ld r27, 8(UP) + nop + mulld r0, r26, VL + mulhdu N, r26, VL + ld r26, 16(UP) + mulld r7, r27, VL + mulhdu r8, r27, VL + mulld r9, r26, VL + mulhdu r10, r26, VL + addc r0, r0, r12 + adde r7, r7, N + adde r9, r9, r8 + addze r12, r10 + std r0, 0(RP) + std r7, 8(RP) + std r9, 16(RP) + addi UP, UP, 24 + addi RP, RP, 24 + b L(fic) + + nop +L(fic): ld r26, 0(UP) +L(b10): ld r27, 8(UP) + addi UP, UP, 16 + bdz L(end) + +L(top): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r26, 0(UP) + ld r27, 8(UP) + adde r0, r0, r12 + adde r7, r7, N + mulld r9, r26, VL + mulhdu r10, r26, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r26, 16(UP) + ld r27, 24(UP) + std r0, 0(RP) + adde r9, r9, r8 + std r7, 8(RP) + adde r11, r11, r10 + std r9, 16(RP) + addi UP, UP, 32 + std r11, 24(RP) + + addi RP, RP, 32 + bdnz L(top) + +L(end): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + adde r0, r0, r12 + adde r7, r7, N + std r0, 0(RP) + std r7, 8(RP) +L(ret): addze RP, r8 + ld r27, -40(r1) + ld r26, -48(r1) + blr +END(__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile new file mode 100644 index 0000000000..5da9052993 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -0,0 +1,47 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-power4 memcpy-ppc64 \ + memcmp-power8 memcmp-power7 memcmp-power4 memcmp-ppc64 \ + memset-power7 memset-power6 memset-power4 \ + memset-ppc64 memset-power8 \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 \ + strnlen-power8 strnlen-power7 strnlen-ppc64 \ + strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ + strncmp-power9 strncmp-power8 strncmp-power7 \ + strncmp-power4 strncmp-ppc64 \ + strchr-power8 strchr-power7 strchr-ppc64 \ + strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ + stpcpy-power7 stpcpy-ppc64 \ + strrchr-power8 strrchr-power7 strrchr-ppc64 \ + strncat-power8 strncat-power7 strncat-ppc64 \ + strncpy-power7 strncpy-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ + strcmp-power9 strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \ + strncpy-power8 strstr-power7 strstr-ppc64 \ + strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \ + strlen-power8 strcasestr-power8 strcasestr-ppc64 \ + strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \ + strncase-power8 + +CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops +CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-power7 wcschr-power6 wcschr-ppc64 \ + wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc64 \ + wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \ + +CFLAGS-wcschr-power7.c += -mcpu=power7 +CFLAGS-wcschr-power6.c += -mcpu=power6 +CFLAGS-wcsrchr-power7.c += -mcpu=power7 +CFLAGS-wcsrchr-power6.c += -mcpu=power6 +CFLAGS-wcscpy-power7.c += -mcpu=power7 +CFLAGS-wcscpy-power6.c += -mcpu=power6 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c new file mode 100644 index 0000000000..a8a097a614 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c @@ -0,0 +1,27 @@ +/* PowerPC64 default bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +extern __typeof (memmove) __memmove_ppc attribute_hidden; + +void __bcopy_ppc (const void *src, void *dest, size_t n) +{ + __memmove_ppc (dest, src, n); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c new file mode 100644 index 0000000000..05d46e2b48 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c @@ -0,0 +1,29 @@ +/* PowerPC64 multiarch bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "init-arch.h" + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +/* __bcopy_power7 symbol is implemented at memmove-power7.S */ +extern __typeof (bcopy) __bcopy_power7 attribute_hidden; + +libc_ifunc (bcopy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bcopy_power7 + : __bcopy_ppc); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c new file mode 100644 index 0000000000..83b224b8d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c @@ -0,0 +1,43 @@ +/* Multiple versions of bzero. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# include <string.h> +# include <strings.h> +# include "init-arch.h" + +extern __typeof (bzero) __bzero_ppc attribute_hidden; +extern __typeof (bzero) __bzero_power4 attribute_hidden; +extern __typeof (bzero) __bzero_power6 attribute_hidden; +extern __typeof (bzero) __bzero_power7 attribute_hidden; +extern __typeof (bzero) __bzero_power8 attribute_hidden; + +libc_ifunc (__bzero, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __bzero_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __bzero_power4 + : __bzero_ppc); + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..eb173f8b05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -0,0 +1,389 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 6 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_POWER5_PLUS | + PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS | PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + +#ifdef SHARED + /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_POWER4, + __memcpy_power4) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memset_power8) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, + __memset_power6) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_POWER4, + __memset_power4) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcpy.c. */ + IFUNC_IMPL (i, name, strcpy, + IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcpy_power8) + IFUNC_IMPL_ADD (array, i, strcpy, hwcap & PPC_FEATURE_HAS_VSX, + __strcpy_power7) + IFUNC_IMPL_ADD (array, i, strcpy, 1, + __strcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpcpy.c. */ + IFUNC_IMPL (i, name, stpcpy, + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpcpy_power8) + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap & PPC_FEATURE_HAS_VSX, + __stpcpy_power7) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, + __stpcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strlen_power8) + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, + __strlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strncmp_power9) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncmp_power8) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, + __strncmp_power7) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4, + __strncmp_power4) + IFUNC_IMPL_ADD (array, i, strncmp, 1, + __strncmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchr_power8) + IFUNC_IMPL_ADD (array, i, strchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strchr_power7) + IFUNC_IMPL_ADD (array, i, strchr, 1, + __strchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchrnul_power8) + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap & PPC_FEATURE_HAS_VSX, + __strchrnul_power7) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, + __strchrnul_ppc)) +#endif + + /* Support sysdeps/powerpc/powerpc64/multiarch/memcmp.c. */ + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memcmp_power8) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX, + __memcmp_power7) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_POWER4, + __memcmp_power4) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __bzero_power8) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, + __bzero_power6) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_POWER4, + __bzero_power4) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bcopy.c. */ + IFUNC_IMPL (i, name, bcopy, + IFUNC_IMPL_ADD (array, i, bcopy, hwcap & PPC_FEATURE_HAS_VSX, + __bcopy_power7) + IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/mempcpy.c. */ + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + hwcap & PPC_FEATURE_HAS_VSX, + __mempcpy_power7) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, + __mempcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memchr_power7) + IFUNC_IMPL_ADD (array, i, memchr, 1, + __memchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memrchr_power7) + IFUNC_IMPL_ADD (array, i, memrchr, 1, + __memrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, + __rawmemchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strnlen_power8) + IFUNC_IMPL_ADD (array, i, strnlen, hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasecmp_power8) + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, + __strcasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncasecmp_power8) + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, + __strncasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcschr_power7) + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcschr_power6) + IFUNC_IMPL_ADD (array, i, wcschr, 1, + __wcschr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcsrchr_power7) + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcsrchr_power6) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, + __wcsrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcscpy.c. */ + IFUNC_IMPL (i, name, wcscpy, + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_HAS_VSX, + __wcscpy_power7) + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcscpy_power6) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, + __wcscpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strrchr_power8) + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strrchr_power7) + IFUNC_IMPL_ADD (array, i, strrchr, 1, + __strrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncat.c. */ + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncat_power8) + IFUNC_IMPL_ADD (array, i, strncat, + hwcap & PPC_FEATURE_HAS_VSX, + __strncat_power7) + IFUNC_IMPL_ADD (array, i, strncat, 1, + __strncat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c. */ + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncpy_power8) + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __strncpy_power7) + IFUNC_IMPL_ADD (array, i, strncpy, 1, + __strncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c. */ + IFUNC_IMPL (i, name, stpncpy, + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpncpy_power8) + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __stpncpy_power7) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strcmp_power9) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcmp_power8) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcmp_power7) + IFUNC_IMPL_ADD (array, i, strcmp, 1, + __strcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcat.c. */ + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcat_power8) + IFUNC_IMPL_ADD (array, i, strcat, + hwcap & PPC_FEATURE_HAS_VSX, + __strcat_power7) + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ + IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strspn_power8) + IFUNC_IMPL_ADD (array, i, strspn, 1, + __strspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ + IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcspn_power8) + IFUNC_IMPL_ADD (array, i, strcspn, 1, + __strcspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, + IFUNC_IMPL_ADD (array, i, strstr, + hwcap & PPC_FEATURE_HAS_VSX, + __strstr_power7) + IFUNC_IMPL_ADD (array, i, strstr, 1, + __strstr_ppc)) + + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasestr.c. */ + IFUNC_IMPL (i, name, strcasestr, + IFUNC_IMPL_ADD (array, i, strcasestr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasestr_power8) + IFUNC_IMPL_ADD (array, i, strcasestr, 1, + __strcasestr_ppc)) + + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h new file mode 100644 index 0000000000..dbbe83c67c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h @@ -0,0 +1,18 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S new file mode 100644 index 0000000000..fedca9c997 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCHR __memchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c new file mode 100644 index 0000000000..b67631f017 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c @@ -0,0 +1,31 @@ +/* PowerPC64 default implementation of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCHR __memchr_ppc + +#undef weak_alias +#define weak_alias(a, b) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +extern __typeof (memchr) __memchr_ppc attribute_hidden; + +#include <string/memchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c new file mode 100644 index 0000000000..f6f4babc09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -0,0 +1,38 @@ +/* Multiple versions of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memchr) __memchr_ppc attribute_hidden; +extern __typeof (__memchr) __memchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memchr_power7 + : __memchr_ppc); + +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) +#else +#include <string/memchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S new file mode 100644 index 0000000000..e38b2a9c44 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S new file mode 100644 index 0000000000..a9cc979b92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S new file mode 100644 index 0000000000..b7837035b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power8/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c new file mode 100644 index 0000000000..3bd035dc49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCMP __memcmp_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__memcmp_ppc) aliasname \ + __attribute__ ((weak, alias ("__memcmp_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memcmp_ppc, __GI_memcmp, __memcmp_ppc); +#endif + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; + +#include <string/memcmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c new file mode 100644 index 0000000000..0d315d5e70 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c @@ -0,0 +1,44 @@ +/* Multiple versions of memcmp. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define memcmp __redirect_memcmp +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; +extern __typeof (memcmp) __memcmp_power4 attribute_hidden; +extern __typeof (memcmp) __memcmp_power7 attribute_hidden; +extern __typeof (memcmp) __memcmp_power8 attribute_hidden; +# undef memcmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcmp, memcmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memcmp_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __memcmp_power4 + : __memcmp_ppc); +#else +#include <string/memcmp.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S new file mode 100644 index 0000000000..a942287900 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_a2 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/a2/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S new file mode 100644 index 0000000000..39aa30c729 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/CELL. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_cell + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/cell/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S new file mode 100644 index 0000000000..6e7fea382b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S new file mode 100644 index 0000000000..40bcdb1161 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power6/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S new file mode 100644 index 0000000000..222936af63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S new file mode 100644 index 0000000000..2dc644c809 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S @@ -0,0 +1,28 @@ +/* Default memcpy implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define MEMCPY __memcpy_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c new file mode 100644 index 0000000000..9f4286c4fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -0,0 +1,55 @@ +/* Multiple versions of memcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memcpy so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memcpy) __libc_memcpy; + +extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; + +libc_ifunc (__libc_memcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 : + (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell : + (hwcap & PPC_FEATURE_POWER4) + ? __memcpy_power4 + : __memcpy_ppc); + +#undef memcpy +strong_alias (__libc_memcpy, memcpy); +libc_hidden_ver (__libc_memcpy, memcpy); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S new file mode 100644 index 0000000000..a9435fa654 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S @@ -0,0 +1,29 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef bcopy +#define bcopy __bcopy_power7 + +#include <sysdeps/powerpc/powerpc64/power7/memmove.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c new file mode 100644 index 0000000000..80353c5332 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c @@ -0,0 +1,44 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_ppc; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_ppc; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_ppc; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_ppc; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_ppc +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_ppc +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_ppc +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_ppc + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +#define MEMMOVE __memmove_ppc + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +#ifdef SHARED +# define memcpy __memcpy_ppc +#endif + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c new file mode 100644 index 0000000000..db2bbc7837 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c @@ -0,0 +1,45 @@ +/* Multiple versions of memmove. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memmove before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +extern __typeof (__redirect_memmove) __memmove_ppc attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_power7 attribute_hidden; + +libc_ifunc (__libc_memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); + +#undef memmove +strong_alias (__libc_memmove, memmove); +libc_hidden_ver (__libc_memmove, memmove); +#else +# include <string/memmove.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S new file mode 100644 index 0000000000..08f133644a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized mempcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMPCPY __mempcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/mempcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c new file mode 100644 index 0000000000..d0741fe318 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c new file mode 100644 index 0000000000..430557ee0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c @@ -0,0 +1,44 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__mempcpy) __mempcpy_ppc attribute_hidden; +extern __typeof (__mempcpy) __mempcpy_power7 attribute_hidden; +# undef mempcpy +# undef __mempcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___mempcpy, __mempcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __mempcpy_power7 + : __mempcpy_ppc); + +weak_alias (__mempcpy, mempcpy) +#else +# include <string/mempcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S new file mode 100644 index 0000000000..052aa732ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMRCHR __memrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c new file mode 100644 index 0000000000..2fc706db71 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c new file mode 100644 index 0000000000..fb09fdf89c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; +extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memrchr_power7 + : __memrchr_ppc); + +weak_alias (__memrchr, memrchr) +#else +#include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S new file mode 100644 index 0000000000..3908e8e412 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power4 + +#include <sysdeps/powerpc/powerpc64/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S new file mode 100644 index 0000000000..4ddbd2e274 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power6 + +#include <sysdeps/powerpc/powerpc64/power6/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S new file mode 100644 index 0000000000..97f686b35d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S @@ -0,0 +1,28 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power7 +#include <sysdeps/powerpc/powerpc64/power7/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..ea303533f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include <sysdeps/powerpc/powerpc64/power8/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S new file mode 100644 index 0000000000..0f16e21c61 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S @@ -0,0 +1,42 @@ +/* Default memset/bzero implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. NOTE: this code should be positioned + before ENTRY/END_GEN_TB redefinition. */ +ENTRY (__bzero_ppc) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero_ppc,TB_TOCLESS) + + +#if defined SHARED && IS_IN (libc) +# define MEMSET __memset_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +/* Do not implement __bzero at powerpc64/memset.S. */ +#define NO_BZERO_IMPL + +#include <sysdeps/powerpc/powerpc64/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c new file mode 100644 index 0000000000..a5d9b3e60e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c @@ -0,0 +1,53 @@ +/* Multiple versions of memset. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memset so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memset +# define memset __redirect_memset +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_memset) __libc_memset; + +extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; +extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__libc_memset, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memset_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __memset_power4 + : __memset_ppc); + +#undef memset +strong_alias (__libc_memset, memset); +libc_hidden_ver (__libc_memset, memset); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S new file mode 100644 index 0000000000..d79d72820c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S @@ -0,0 +1,23 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RAWMEMCHR __rawmemchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/rawmemchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c new file mode 100644 index 0000000000..cb55dbcc50 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c new file mode 100644 index 0000000000..8bfd58dd47 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c @@ -0,0 +1,39 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; +extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; +# undef __rawmemchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __rawmemchr_power7 + : __rawmemchr_ppc); + +weak_alias (__rawmemchr, rawmemchr) +#else +#include <string/rawmemchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c new file mode 100644 index 0000000000..7fb4b733e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/rtld-memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S new file mode 100644 index 0000000000..16ba7264c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c new file mode 100644 index 0000000000..e4b9ce9b6f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c @@ -0,0 +1,36 @@ +/* Multiarch stpcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (stpcpy) __stpcpy_power7 attribute_hidden; + +#define STPCPY __stpcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S new file mode 100644 index 0000000000..935347115a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized stpcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STPCPY __stpcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c new file mode 100644 index 0000000000..b5a3b12c05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c @@ -0,0 +1,37 @@ +/* Multiarch stpcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (stpcpy) __stpcpy_ppc attribute_hidden; + +#define STPCPY __stpcpy_ppc +#define memcpy __memcpy_ppc +#define strlen __strlen_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c new file mode 100644 index 0000000000..3e34e3cafe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c @@ -0,0 +1,41 @@ +/* Multiple versions of stpcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define NO_MEMPCPY_STPCPY_REDIRECT +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpcpy) __stpcpy_ppc attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power7 attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power8 attribute_hidden; + +libc_ifunc_hidden (__stpcpy, __stpcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpcpy_power7 + : __stpcpy_ppc); + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_def (stpcpy) +#else +# include <string/stpcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S new file mode 100644 index 0000000000..6636b01d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S @@ -0,0 +1,30 @@ +/* Optimized stpncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S new file mode 100644 index 0000000000..6ce706a879 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S @@ -0,0 +1,28 @@ +/* Optimized stpncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c new file mode 100644 index 0000000000..22186166a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c @@ -0,0 +1,26 @@ +/* Default stpncpy implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STPNCPY __stpncpy_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_ppc, __GI___stpncpy, __stpncpy_ppc); +#endif + +#include <string/stpncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c new file mode 100644 index 0000000000..e9b37dcc9a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c @@ -0,0 +1,39 @@ +/* Multiple versions of stpncpy. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define stpncpy __redirect_stpncpy +# define __stpncpy __redirect___stpncpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power8 attribute_hidden; +# undef stpncpy +# undef __stpncpy + +libc_ifunc_redirected (__redirect___stpncpy, __stpncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpncpy_power7 + : __stpncpy_ppc); +weak_alias (__stpncpy, stpncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S new file mode 100644 index 0000000000..025c5a9f13 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power7 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S new file mode 100644 index 0000000000..9b62476e09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c new file mode 100644 index 0000000000..cbf91755da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strcasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strcasecmp __strcasecmp_ppc + +#include <string/strcasecmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c new file mode 100644 index 0000000000..dcb4ef4125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strcasecmp) __libc_strcasecmp; + +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strcasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); + +weak_alias (__libc_strcasecmp, strcasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S new file mode 100644 index 0000000000..da4c4054c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S @@ -0,0 +1,31 @@ +/* Optimized strcasecmp_l implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_l_power7 + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define USE_IN_EXTENDED_LOCALE_MODEL + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c new file mode 100644 index 0000000000..10b8f2e84d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c @@ -0,0 +1,40 @@ +/* Multiple versions of strcasecmp_l. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp_l __strcasecmp_l_ppc +extern __typeof (__strcasecmp_l) __strcasecmp_l_ppc attribute_hidden; +extern __typeof (__strcasecmp_l) __strcasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strcasecmp_l.c> +#undef strcasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp_l) __libc_strcasecmp_l; +libc_ifunc (__libc_strcasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_l_power7 + : __strcasecmp_l_ppc); + +weak_alias (__libc_strcasecmp_l, strcasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S new file mode 100644 index 0000000000..2cfb5ae77a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S @@ -0,0 +1,35 @@ +/* Optimized strcasestr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCASESTR __strcasestr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* The following definitions are used in strcasestr optimization. */ + +/* strlen is used to calculate len of r4. */ +#define STRLEN __strlen_power8 +/* strnlen is used to check if len of r3 is more than r4. */ +#define STRNLEN __strnlen_power7 +/* strchr is used to check if first char of r4 is present in r3. */ +#define STRCHR __strchr_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strcasestr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c new file mode 100644 index 0000000000..61f278f697 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c @@ -0,0 +1,34 @@ +/* PowerPC64 default implementation of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strcasestr_ppc, __GI_strcasestr, __strcasestr_ppc); +#endif + + +#undef weak_alias +#define weak_alias(a,b) + +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c new file mode 100644 index 0000000000..9e6a16d6a9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c @@ -0,0 +1,37 @@ +/* Multiple versions of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasestr) __strcasestr_ppc attribute_hidden; +extern __typeof (__strcasestr) __strcasestr_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strcasestr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasestr_power8 + : __strcasestr_ppc); + +weak_alias (__strcasestr, strcasestr) +#else +#include <string/strcasestr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c new file mode 100644 index 0000000000..22d2caaec3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power7; +extern typeof (strlen) __strlen_power7; + +#define strcpy __strcpy_power7 +#define strlen __strlen_power7 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c new file mode 100644 index 0000000000..f138beec67 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power8 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power8; +extern typeof (strlen) __strlen_power8; + +#define strcpy __strcpy_power8 +#define strlen __strlen_power8 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c new file mode 100644 index 0000000000..5049fc03f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_ppc, __GI_strcat, __strcat_ppc); +#endif + +extern __typeof (strcat) __strcat_ppc attribute_hidden; + +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c new file mode 100644 index 0000000000..3336aedcec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strcat __redirect_strcat +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcat) __strcat_ppc attribute_hidden; +extern __typeof (strcat) __strcat_power7 attribute_hidden; +extern __typeof (strcat) __strcat_power8 attribute_hidden; +# undef strcat + +libc_ifunc_redirected (__redirect_strcat, strcat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcat_power7 + : __strcat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S new file mode 100644 index 0000000000..e64c0b7c82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S new file mode 100644 index 0000000000..bbda7b0505 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S new file mode 100644 index 0000000000..769f9f07d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef SHARED +# define STRCHR __strchr_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c new file mode 100644 index 0000000000..573105818f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c @@ -0,0 +1,42 @@ +/* Multiple versions of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strchr) __strchr_ppc attribute_hidden; +extern __typeof (strchr) __strchr_power7 attribute_hidden; +extern __typeof (strchr) __strchr_power8 attribute_hidden; +# undef strchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strchr, strchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchr_power7 + : __strchr_ppc); +weak_alias (strchr, index) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S new file mode 100644 index 0000000000..c8e28721fd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S new file mode 100644 index 0000000000..1cd39fc1b3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c new file mode 100644 index 0000000000..8d313c3e1d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c new file mode 100644 index 0000000000..1e9018f88a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c @@ -0,0 +1,40 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strchrnul, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchrnul_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchrnul_power7 + : __strchrnul_ppc); + +weak_alias (__strchrnul, strchrnul) +#else +#include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S new file mode 100644 index 0000000000..82d1b63af9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S new file mode 100644 index 0000000000..b2464a8018 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S new file mode 100644 index 0000000000..48ea05d2c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER9/PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S new file mode 100644 index 0000000000..085e74758f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S @@ -0,0 +1,29 @@ +/* Default strcmp implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRCMP __strcmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strcmp; __GI_strcmp = __strcmp_ppc +#endif /* SHARED && IS_IN */ + +#include <sysdeps/powerpc/powerpc64/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c new file mode 100644 index 0000000000..fc10205b00 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -0,0 +1,42 @@ +/* Multiple versions of strcmp. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcmp __redirect_strcmp +/* Omit the strcmp inline definitions because it would redefine strcmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcmp) __strcmp_ppc attribute_hidden; +extern __typeof (strcmp) __strcmp_power7 attribute_hidden; +extern __typeof (strcmp) __strcmp_power8 attribute_hidden; +extern __typeof (strcmp) __strcmp_power9 attribute_hidden; + +# undef strcmp + +libc_ifunc_redirected (__redirect_strcmp, strcmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strcmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcmp_power7 + : __strcmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c new file mode 100644 index 0000000000..892a551183 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c @@ -0,0 +1,32 @@ +/* Multiarch strcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; + +#define STRCPY __strcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S new file mode 100644 index 0000000000..6c753b5d1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCPY __strcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c new file mode 100644 index 0000000000..cd6dd09541 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c @@ -0,0 +1,35 @@ +/* Multiarch strcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#if defined SHARED && IS_IN (libc) +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; + +# define STRCPY __strcpy_ppc +# define memcpy __memcpy_ppc +# define strlen __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcpy_ppc, __GI_strcpy, __strcpy_ppc); +#endif + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c new file mode 100644 index 0000000000..0da53e30b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcpy __redirect_strcpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power8 attribute_hidden; +#undef strcpy + +libc_ifunc_redirected (__redirect_strcpy, strcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcpy_power7 + : __strcpy_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S new file mode 100644 index 0000000000..39b4cd8239 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strcspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strcspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c new file mode 100644 index 0000000000..96396af125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c @@ -0,0 +1,26 @@ +/* Default strcspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRCSPN __strcspn_ppc + +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) +#endif + +#include <string/strcspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c new file mode 100644 index 0000000000..a6df885181 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strcspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +#undef strcspn +extern __typeof (strcspn) __libc_strcspn; + +extern __typeof (strcspn) __strcspn_ppc attribute_hidden; +extern __typeof (strcspn) __strcspn_power8 attribute_hidden; + +libc_ifunc (__libc_strcspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcspn_power8 + : __strcspn_ppc); + +weak_alias (__libc_strcspn, strcspn) +libc_hidden_builtin_def (strcspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S new file mode 100644 index 0000000000..333496efa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S new file mode 100644 index 0000000000..b4deea5f93 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S new file mode 100644 index 0000000000..13231b8c64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S @@ -0,0 +1,28 @@ +/* Default strlen implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRLEN __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c new file mode 100644 index 0000000000..a5a7b59558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c @@ -0,0 +1,44 @@ +/* Multiple versions of strlen. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +/* Redefine strlen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strlen +# define strlen __redirect_strlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_strlen) __libc_strlen; + +extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power8 attribute_hidden; + +libc_ifunc (__libc_strlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strlen_power7 + : __strlen_ppc); + +#undef strlen +strong_alias (__libc_strlen, strlen) +libc_hidden_ver (__libc_strlen, strlen) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c new file mode 100644 index 0000000000..177da4a2f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp __strncasecmp_power7 + +extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S new file mode 100644 index 0000000000..8a24c34719 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S @@ -0,0 +1,28 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strncasecmp __strncasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncase.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c new file mode 100644 index 0000000000..0a75f75745 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strncasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strncasecmp __strncasecmp_ppc + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c new file mode 100644 index 0000000000..197f7133e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c @@ -0,0 +1,36 @@ +/* Multiple versions of strncasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strncasecmp) __libc_strncasecmp; + +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strncasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); + +weak_alias (__libc_strncasecmp, strncasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c new file mode 100644 index 0000000000..f87ff6c640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp_l __strncasecmp_l_power7 +#define USE_IN_EXTENDED_LOCALE_MODEL 1 + +extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c new file mode 100644 index 0000000000..6c2429c58a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncasecmp_l + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp_l __strncasecmp_l_ppc +extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; +extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strncase_l.c> +#undef strncasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; +libc_ifunc (__libc_strncasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_l_power7 + : __strncasecmp_l_ppc); + +weak_alias (__libc_strncasecmp_l, strncasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c new file mode 100644 index 0000000000..f695f834a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power7 + +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power7 +#define __strnlen __strnlen_power7 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c new file mode 100644 index 0000000000..1ec1259b95 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power8 + +extern __typeof (strncat) __strncat_power8 attribute_hidden; +extern __typeof (strlen) __strlen_power8 attribute_hidden; +extern __typeof (strnlen) __strnlen_power8 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power8 +#define __strnlen __strnlen_power8 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c new file mode 100644 index 0000000000..e4c8c01105 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncat_ppc, __GI_strncat, __strncat_ppc); +#endif + +extern __typeof (strncat) __strncat_ppc attribute_hidden; + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c new file mode 100644 index 0000000000..72f283354e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c @@ -0,0 +1,34 @@ +/* Multiple versions of strncat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncat) __strncat_ppc attribute_hidden; +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strncat) __strncat_power8 attribute_hidden; + +libc_ifunc (strncat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncat_power7 + : __strncat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S new file mode 100644 index 0000000000..01729a3bba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S new file mode 100644 index 0000000000..a069d4b21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S new file mode 100644 index 0000000000..3cbcaada62 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S new file mode 100644 index 0000000000..6d0deaa6e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S new file mode 100644 index 0000000000..e4b93ae8f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S @@ -0,0 +1,28 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRNCMP __strncmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c new file mode 100644 index 0000000000..14122c65a4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -0,0 +1,47 @@ +/* Multiple versions of strncmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncmp) __strncmp_ppc attribute_hidden; +extern __typeof (strncmp) __strncmp_power4 attribute_hidden; +extern __typeof (strncmp) __strncmp_power7 attribute_hidden; +extern __typeof (strncmp) __strncmp_power8 attribute_hidden; +extern __typeof (strncmp) __strncmp_power9 attribute_hidden; +# undef strncmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncmp, strncmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strncmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __strncmp_power4 + : __strncmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S new file mode 100644 index 0000000000..03f7f83448 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized strncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S new file mode 100644 index 0000000000..17117eb7ec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S @@ -0,0 +1,29 @@ +/* Optimized strncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* memset is used to pad the end of the string. */ +#define MEMSET __memset_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c new file mode 100644 index 0000000000..32412974aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRNCPY __strncpy_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strncpy_ppc) aliasname \ + __attribute__ ((weak, alias ("__strncpy_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strncpy_ppc, __GI_strncpy, __strncpy_ppc); +#endif + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; + +#include <string/strncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c new file mode 100644 index 0000000000..bb63c185e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncpy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strncpy __redirect_strncpy +/* Omit the strncpy inline definitions because it would redefine strncpy. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; +extern __typeof (strncpy) __strncpy_power7 attribute_hidden; +extern __typeof (strncpy) __strncpy_power8 attribute_hidden; +# undef strncpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncpy, strncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncpy_power7 + : __strncpy_ppc); + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..2f0a183e31 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNLEN __strnlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S new file mode 100644 index 0000000000..ccea15df10 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strnlen __strnlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power8/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c new file mode 100644 index 0000000000..708455a156 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c new file mode 100644 index 0000000000..7f89132aa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c @@ -0,0 +1,41 @@ +/* Multiple versions of strnlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strnlen) __strnlen_ppc attribute_hidden; +extern __typeof (__strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (__strnlen) __strnlen_power8 attribute_hidden; +# undef strnlen +# undef __strnlen +libc_ifunc_redirected (__redirect___strnlen, __strnlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strnlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strnlen_power7 + : __strnlen_ppc); +weak_alias (__strnlen, strnlen) + +#else +#include <string/strnlen.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S new file mode 100644 index 0000000000..10bab2ec54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strrchr implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRRCHR __strrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S new file mode 100644 index 0000000000..23365a1446 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S @@ -0,0 +1,39 @@ +/* Optimized strrchr implementation for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(__strrchr_power8) \ + .align ALIGNARG(2); \ + BODY_LABEL(__strrchr_power8): \ + cfi_startproc; \ + LOCALENTRY(__strrchr_power8) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strrchr_power8) \ + END_2(__strrchr_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c new file mode 100644 index 0000000000..62b77a0bbe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRRCHR __strrchr_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strrchr_ppc, __GI_strrchr, __strrchr_ppc); +#endif + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; + +#include <string/strrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c new file mode 100644 index 0000000000..0f94c9d6a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c @@ -0,0 +1,40 @@ +/* Multiple versions of strrchr. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strrchr __redirect_strrchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; +extern __typeof (strrchr) __strrchr_power7 attribute_hidden; +extern __typeof (strrchr) __strrchr_power8 attribute_hidden; +#undef strrchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strrchr, strrchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strrchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strrchr_power7 + : __strrchr_ppc); +weak_alias (strrchr, rindex) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S new file mode 100644 index 0000000000..f8487f1cbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c new file mode 100644 index 0000000000..53d3d61651 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c @@ -0,0 +1,25 @@ +/* Default strspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRSPN __strspn_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) +#endif + +#include <string/strspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c new file mode 100644 index 0000000000..0957482766 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +#undef strspn +extern __typeof (strspn) __libc_strspn; + +extern __typeof (strspn) __strspn_ppc attribute_hidden; +extern __typeof (strspn) __strspn_power8 attribute_hidden; + +libc_ifunc (__libc_strspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strspn_power8 + : __strspn_ppc); + +weak_alias (__libc_strspn, strspn) +libc_hidden_builtin_def (strspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S new file mode 100644 index 0000000000..3991df74a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S @@ -0,0 +1,30 @@ +/* Optimized strstr implementation for POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSTR __strstr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define STRLEN __strlen_power7 +#define STRNLEN __strnlen_power7 +#define STRCHR __strchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strstr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c new file mode 100644 index 0000000000..37add12c87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strstr_ppc, __GI_strstr, __strstr_ppc); +#endif + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c new file mode 100644 index 0000000000..d903b2702b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c @@ -0,0 +1,36 @@ +/* Multiple versions of strstr. PowerPC64 version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strstr __redirect_strstr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strstr) __strstr_ppc attribute_hidden; +extern __typeof (strstr) __strstr_power7 attribute_hidden; +# undef strstr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strstr, strstr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strstr_power7 + : __strstr_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c new file mode 100644 index 0000000000..080cb696a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c new file mode 100644 index 0000000000..8f4de0e857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c new file mode 100644 index 0000000000..e781e947fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c new file mode 100644 index 0000000000..ca373e096f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c @@ -0,0 +1,43 @@ +/* Multiple versions of wcschr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcschr __redirect_wcschr +# define __wcschr __redirect___wcschr +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcschr) __wcschr_ppc attribute_hidden; +extern __typeof (wcschr) __wcschr_power6 attribute_hidden; +extern __typeof (wcschr) __wcschr_power7 attribute_hidden; +# undef wcschr +# undef __wcschr + +libc_ifunc_redirected (__redirect___wcschr, __wcschr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcschr_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcschr_power6 + : __wcschr_ppc); +weak_alias (__wcschr, wcschr) +#else +#undef libc_hidden_def +#define libc_hidden_def(a) +#include <wcsmbs/wcschr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c new file mode 100644 index 0000000000..89d8a39640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c new file mode 100644 index 0000000000..47ba73b2cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c new file mode 100644 index 0000000000..1924b235ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c new file mode 100644 index 0000000000..13e44afb09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcscpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; + +libc_ifunc (wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +#else +#include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c new file mode 100644 index 0000000000..5dc448b339 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c new file mode 100644 index 0000000000..fa25aa0475 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c new file mode 100644 index 0000000000..8a913412a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c new file mode 100644 index 0000000000..07590f5a90 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcsrchr) __wcsrchr_ppc attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power6 attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power7 attribute_hidden; + +libc_ifunc (wcsrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcsrchr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcsrchr_power6 + : __wcsrchr_ppc); +#else +#include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c new file mode 100644 index 0000000000..078156f5d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies new file mode 100644 index 0000000000..a372141bb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile new file mode 100644 index 0000000000..ba06adb5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile @@ -0,0 +1,6 @@ +# Makefile fragment for POWER4/5/5+. + +ifeq ($(subdir),string) +CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies new file mode 100644 index 0000000000..c1f617b7da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies new file mode 100644 index 0000000000..8d6531a174 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S new file mode 100644 index 0000000000..6ca98e909c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -0,0 +1,1369 @@ +/* Optimized memcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + + .machine power4 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, -8(r1) + std rWORD7, -16(r1) + cfi_offset(rWORD8, -8) + cfi_offset(rWORD7, -16) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 0(rSTR1) + ld rWORD4, 0(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne- cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr7x): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr1x): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr6x): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr5x): + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ +#if 0 +/* Huh? We've already branched on cr6! */ + beq- cr6, L(zeroLength) +#endif + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne- cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz- L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne- cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne- cr6, L(bLcr6) + bne- cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne- cr7, L(bx12) + bne- cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne- cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +L(unaligned): + std rSHL, -24(r1) + cfi_offset(rSHL, -24) + clrldi rSHL, rSTR2, 61 + beq- cr6, L(duzeroLength) + std rSHR, -32(r1) + cfi_offset(rSHR, -32) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, -40(r1) + cfi_offset(rWORD8_SHIFT, -40) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, -56(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 8 +#endif + sld rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, -40(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, -56(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD8_SHIFT, -40) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + sldi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 8 + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD6, 0(rSTR2) + ldu rWORD8, 8(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD7, 0(rSTR1) +#endif + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD3, 0(rSTR1) +#endif + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD1, 0(rSTR1) +#endif + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 +#else + ld rWORD1, 8(rSTR1) +#endif + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, -16(r1) + ld rSHL, -24(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, -32(r1) + ld rWORD8_SHIFT, -40(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, -48(r1) + ld rWORD4_SHIFT, -56(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, -64(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dureturn29): + ld rSHL, -24(r1) + ld rSHR, -32(r1) +L(dureturn27): + ld rWORD8_SHIFT, -40(r1) +L(dureturn26): + ld rWORD2_SHIFT, -48(r1) +L(dureturn25): + ld rWORD4_SHIFT, -56(r1) +L(dureturn24): + ld rWORD6_SHIFT, -64(r1) + blr +L(duzeroLength): + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h new file mode 100644 index 0000000000..9a4ff79f4a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h @@ -0,0 +1 @@ +#include "../../powerpc32/power4/memcopy.h" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S new file mode 100644 index 0000000000..2e96376b9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -0,0 +1,477 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine power4 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + cmpldi cr1,11,0 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 + sldi 10,10,3 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) + subfic 9,10,64 + bf 30,1f + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,8f /* if total DWs = 3, then bypass loop */ + bf 31,4f + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,8f /* if total DWs = 4, then bypass loop */ + b 4f + .align 4 +1: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,4f + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +4: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ 4b + .align 4 +8: + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) +3: + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr1,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S new file mode 100644 index 0000000000..a57214e0b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S @@ -0,0 +1,251 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power4 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + li rCLS,128 /* cache line size is 128 */ + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP +L(getCacheAligned): + cmpldi cr1,rLEN,32 + andi. rTMP,rMEMP,127 + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies new file mode 100644 index 0000000000..30edcf7f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S new file mode 100644 index 0000000000..2b0c00dfb2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -0,0 +1,225 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + ble cr1, L(ux) +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + .align 4 +L(u1): + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + b L(u1) + +L(u3): sub rRTN, rWORD3, rWORD4 + blr +L(u4): sub rRTN, rWORD1, rWORD2 + blr +L(ux): + li rRTN, 0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies new file mode 100644 index 0000000000..565bc94471 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies @@ -0,0 +1,4 @@ +powerpc/power5+/fpu +powerpc/power5+ +powerpc/powerpc64/power5/fpu +powerpc/powerpc64/power5 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies new file mode 100644 index 0000000000..f00c50fb49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies new file mode 100644 index 0000000000..c0e67848e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S new file mode 100644 index 0000000000..39b7ee78e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S @@ -0,0 +1,37 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S new file mode 100644 index 0000000000..d1c6f26d6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S @@ -0,0 +1,30 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S new file mode 100644 index 0000000000..6411f15633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S @@ -0,0 +1,37 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S new file mode 100644 index 0000000000..26c3b2594b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S @@ -0,0 +1,30 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S new file mode 100644 index 0000000000..909714b449 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S @@ -0,0 +1,58 @@ +/* llround function. POWER5+, PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +EALIGN (__llround, 4, 0) + CALL_MCOUNT 0 + frin fp2, fp1 /* Round to nearest +-0.5. */ + fctidz fp3, fp2 /* Convert To Integer DW round toward 0. */ + stfd fp3, -16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3, -16(r1) + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S new file mode 100644 index 0000000000..dc46d20f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S @@ -0,0 +1,37 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S new file mode 100644 index 0000000000..0a587843ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S @@ -0,0 +1,30 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S new file mode 100644 index 0000000000..7f8290e408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S @@ -0,0 +1,37 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S new file mode 100644 index 0000000000..07f5d33127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S @@ -0,0 +1,30 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies new file mode 100644 index 0000000000..0851b19fa2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies new file mode 100644 index 0000000000..6b8c23efa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies new file mode 100644 index 0000000000..3740d050a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..d6a829ea37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S @@ -0,0 +1,60 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + lis r0,0x7ff0 + nop /* insure the following is in a different */ + nop /* dispatch group */ + ld r4,-8(r1) + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies new file mode 100644 index 0000000000..9a3cbb0938 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies new file mode 100644 index 0000000000..4c782d4122 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power5+/fpu +powerpc/powerpc64/power5+ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies new file mode 100644 index 0000000000..f09854edb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies new file mode 100644 index 0000000000..fca8a4ef0f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S new file mode 100644 index 0000000000..ec36d1be5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S @@ -0,0 +1,58 @@ +/* copysign(). PowerPC64/POWER6 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + + .section ".text" + .type __copysign, @function + .machine power6 +EALIGN (__copysign, 4, 0) + CALL_MCOUNT 0 + fcpsgn fp1,fp2,fp1 + blr +END (__copysign) + +hidden_def (__copysign) +weak_alias (__copysign, copysign) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__copysign, __copysignf) +hidden_def (__copysignf) +weak_alias (__copysignf, copysignf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, copysign, copysignl, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, copysign, copysignl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S new file mode 100644 index 0000000000..d4aa702d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_copysign.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..85187b45f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S @@ -0,0 +1,59 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + ori r1,r1,0 + ld r4,-8(r1) + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S new file mode 100644 index 0000000000..1f7294b8ed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -0,0 +1,1499 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. + + For POWER6 unaligned loads will take a 20+ cycle hiccup for any + L1 cache miss that crosses a 32- or 128-byte boundary. Store + is more forgiving and does not take a hiccup until page or + segment boundaries. So we require doubleword alignment for + the source but may take a risk and only require word alignment + for the destination. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine "power6" +EALIGN (MEMCPY, 7, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + mtcrf 0x01,0 + cmpld cr6,10,11 + srdi 9,5,3 /* Number of full double words remaining. */ + beq .L0 + + subf 5,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. + Duplicate some code to maximize fall-through and minimize agen delays. */ +1: bf 31,2f + lbz 6,0(4) + stb 6,0(3) + bf 30,5f + lhz 6,1(4) + sth 6,1(3) + bf 29,0f + lwz 6,3(4) + stw 6,3(3) + b 0f +5: + bf 29,0f + lwz 6,1(4) + stw 6,1(3) + b 0f + +2: bf 30,4f + lhz 6,0(4) + sth 6,0(3) + bf 29,0f + lwz 6,2(4) + stw 6,2(3) + b 0f + +4: bf 29,0f + lwz 6,0(4) + stw 6,0(3) +0: +/* Add the number of bytes until the 1st doubleword of dst to src and dst. */ + add 4,4,0 + add 3,3,0 + + clrldi 10,4,61 /* check alignment of src again. */ + srdi 9,5,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ + .align 4 +.L0: + clrldi 11,5,61 + andi. 0,5,0x78 + srdi 12,5,7 /* Number of 128-byte blocks to move. */ + cmpldi cr1,11,0 /* If the tail is 0 bytes */ + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 16 doublewords (128-bytes) per iteration. + If the copy is not an exact multiple of 128 bytes, 1-15 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte + are copied a word/halfword/byte at a time as needed to preserve + alignment. + + For POWER6 the L1 is store-through and the L2 is store-in. The + L2 is clocked at half CPU clock so we can store 16 bytes every + other cycle. POWER6 also has a load/store bypass so we can do + load, load, store, store every 2 cycles. + + The following code is sensitive to cache line alignment. Do not + make any change with out first making sure they don't result in + splitting ld/std pairs across a cache line. */ + + mtcrf 0x02,5 + mtcrf 0x01,5 + cmpldi cr5,12,1 + beq L(das_loop) + + bf 25,4f + .align 3 + ld 6,0(4) + ld 7,8(4) + mr 11,4 + mr 10,3 + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + std 6,16(3) + std 7,24(3) + ld 6,0+32(4) + ld 7,8+32(4) + addi 4,4,64 + addi 3,3,64 + std 6,0+32(10) + std 7,8+32(10) + ld 6,16+32(11) + ld 7,24+32(11) + std 6,16+32(10) + std 7,24+32(10) +4: + mr 10,3 + bf 26,2f + ld 6,0(4) + ld 7,8(4) + mr 11,4 + nop + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + addi 4,4,32 + std 6,16(3) + std 7,24(3) + addi 3,3,32 +6: + nop + bf 27,5f + ld 6,0+32(11) + ld 7,8+32(11) + addi 4,4,16 + addi 3,3,16 + std 6,0+32(10) + std 7,8+32(10) + bf 28,L(das_loop_s) + ld 0,16+32(11) + addi 4,4,8 + addi 3,3,8 + std 0,16+32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +5: + nop + bf 28,L(das_loop_s) + ld 6,32(11) + addi 4,4,8 + addi 3,3,8 + std 6,32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +2: + mr 11,4 + bf 27,1f + ld 6,0(4) + ld 7,8(4) + addi 4,4,16 + addi 3,3,16 + std 6,0(10) + std 7,8(10) + bf 28,L(das_loop_s) + ld 0,16(11) + addi 4,11,24 + addi 3,10,24 + std 0,16(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +1: + nop + bf 28,L(das_loop_s) + ld 6,0(4) + addi 4,4,8 + addi 3,3,8 + std 6,0(10) +L(das_loop_s): + nop + blt cr5,L(das_tail) + .align 4 +L(das_loop): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + addi 12,12,-1 + nop + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + ble cr5,L(das_loop_e) + + mtctr 12 + .align 4 +L(das_loop2): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + bdnz L(das_loop2) +L(das_loop_e): +/* Check of a 1-7 byte tail, return if none. */ + bne cr1,L(das_tail2) +/* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(das_tail): + beq cr1,0f + +L(das_tail2): +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 6,4(4) + sth 6,4(3) + bf 31,0f + lbz 6,6(4) + stb 6,6(3) + b 0f +5: bf 31,0f + lbz 6,4(4) + stb 6,4(3) + b 0f + +2: bf 30,1f + lhz 6,0(4) + sth 6,0(3) + bf 31,0f + lbz 6,2(4) + stb 6,2(3) + b 0f + +1: bf 31,0f + lbz 6,0(4) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 128-byte, + and 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 128-byte + boundaries. Since only loads are sensitive to the 32-/128-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small (~20 cycle) delay. */ + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq L(dus_tail) /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +L(dus_tail): +/* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ +/* We need to special case the fall-through because the biggest delays + are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) + blt cr1,L(dus_tail8) + cmpldi cr0,10,24 +L(dus_tail16): /* Move 16 bytes. */ + stw 6,0(3) + stw 7,4(3) + lwz 6,8(12) + lwz 7,12(12) + stw 6,8(3) + stw 7,12(3) +/* Move 8 bytes more. */ + bf 28,L(dus_tail16p8) + cmpldi cr1,10,28 + lwz 6,16(12) + lwz 7,20(12) + stw 6,16(3) + stw 7,20(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail16p4) + lwz 6,24(12) + stw 6,24(3) + addi 12,12,28 + addi 3,3,28 + bgt cr1,L(dus_tail2) + /* exactly 28 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(dus_tailX) /* exactly 16 bytes, early exit. */ + cmpldi cr1,10,20 + bf 29,L(dus_tail16p2) +/* Move 4 bytes more. */ + lwz 6,16(12) + stw 6,16(3) + addi 12,12,20 + addi 3,3,20 + bgt cr1,L(dus_tail2) + /* exactly 20 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 3,3,24 + bgt cr0,L(dus_tail2) + /* exactly 24 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 3,3,16 + b L(dus_tail2) + + .align 4 +L(dus_tail8): /* Move 8 bytes. */ +/* r6, r7 already loaded speculatively. */ + cmpldi cr1,10,8 + cmpldi cr0,10,12 + bf 28,L(dus_tail4) + .align 2 + stw 6,0(3) + stw 7,4(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail8p4) + lwz 6,8(12) + stw 6,8(3) + addi 12,12,12 + addi 3,3,12 + bgt cr0,L(dus_tail2) + /* exactly 12 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 3,3,8 + bgt cr1,L(dus_tail2) + /* exactly 8 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + + .align 4 +L(dus_tail4): /* Move 4 bytes. */ +/* r6 already loaded speculatively. If we are here we know there is + more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +L(dus_tail2): /* Move 2-3 bytes. */ + bf 30,L(dus_tail1) + lhz 6,0(12) + sth 6,0(3) + bf 31,L(dus_tailX) + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +L(dus_tail1): /* Move 1 byte. */ + bf 31,L(dus_tailX) + lbz 6,0(12) + stb 6,0(3) +L(dus_tailX): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,L(dus_4) +/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 + cycle delay. This case should be rare and any attempt to avoid this + would take most of 20 cycles any way. */ + ld 6,0(4) + std 6,0(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(dus_4): + bf 29,L(dus_tail2) + lwz 6,0(4) + stw 6,0(3) + bf 30,L(dus_5) + lhz 7,4(4) + sth 7,4(3) + bf 31,L(dus_0) + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +L(dus_5): + bf 31,L(dus_0) + lbz 6,4(4) + stb 6,4(3) +L(dus_0): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + cfi_offset(31,-8) + mr 12,4 + mr 31,5 + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 /* Move source addr to previous full double word. */ + cmpldi cr5, 10, 2 + cmpldi cr0, 10, 4 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) /* pre load 1st full doubleword. */ + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) /* pre load 2nd full doubleword. */ + bge cr0, L(du4_do) + blt cr5, L(du1_do) + beq cr5, L(du2_do) + b L(du3_do) + + .align 4 +L(du1_do): + bf 30,L(du1_1dw) + + /* there are at least two DWs to copy */ + /* FIXME: can combine last shift and "or" into "rldimi" */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du1_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du1_fini) /* if total DWs = 4, then bypass loop */ + b L(du1_loop) + .align 4 +L(du1_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du1_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du1_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du1_loop) + .align 4 +L(du1_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du2_do): + bf 30,L(du2_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du2_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du2_fini) /* if total DWs = 4, then bypass loop */ + b L(du2_loop) + .align 4 +L(du2_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du2_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du2_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du2_loop) + .align 4 +L(du2_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du3_do): + bf 30,L(du3_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du3_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du3_fini) /* if total DWs = 4, then bypass loop */ + b L(du3_loop) + .align 4 +L(du3_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du3_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du3_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du3_loop) + .align 4 +L(du3_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du4_do): + cmpldi cr5, 10, 6 + beq cr0, L(du4_dox) + blt cr5, L(du5_do) + beq cr5, L(du6_do) + b L(du7_do) +L(du4_dox): + bf 30,L(du4_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du4_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du4_fini) /* if total DWs = 4, then bypass loop */ + b L(du4_loop) + .align 4 +L(du4_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du4_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du4_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du4_loop) + .align 4 +L(du4_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du5_do): + bf 30,L(du5_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du5_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du5_fini) /* if total DWs = 4, then bypass loop */ + b L(du5_loop) + .align 4 +L(du5_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du5_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du5_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du5_loop) + .align 4 +L(du5_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du6_do): + bf 30,L(du6_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du6_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du6_fini) /* if total DWs = 4, then bypass loop */ + b L(du6_loop) + .align 4 +L(du6_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du6_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du6_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du6_loop) + .align 4 +L(du6_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du7_do): + bf 30,L(du7_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du7_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du7_fini) /* if total DWs = 4, then bypass loop */ + b L(du7_loop) + .align 4 +L(du7_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du7_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du7_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du7_loop) + .align 4 +L(du7_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du_done): + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr1,0f /* If the tail is 0 bytes we are done! */ + + add 3,3,0 + add 12,12,0 +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S new file mode 100644 index 0000000000..aee1c8eabb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S @@ -0,0 +1,395 @@ +/* Optimized 64-bit memset implementation for POWER6. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power6 +EALIGN (MEMSET, 7, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 +#define rMEMP3 r9 /* Alt mem pointer. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ + .align 4 +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ + beq L(medium) /* We may not actually get to do a full line. */ + .align 4 +/* Storing a non-zero "c" value. We are aligned at a sector (32-byte) + boundary may not be at cache line (128-byte) boundary. */ +L(nzloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + addi rMEMP,rMEMP,32 + std rCHR,8(rMEMP3) + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + +/* Now we are aligned to the cache line and can use dcbtst. */ + .align 4 +L(nzCacheAligned): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + .align 5 +L(nzCacheAligned128): + cmpldi cr1,rLEN,256 + addi rMEMP3,rMEMP,64 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + std rCHR,24(rMEMP) + std rCHR,32(rMEMP) + std rCHR,40(rMEMP) + std rCHR,48(rMEMP) + std rCHR,56(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + bge cr1,L(nzCacheAligned128) + dcbtst 0,rMEMP + b L(cacheAligned1) + .align 5 +/* Storing a zero "c" value. We are aligned at a sector (32-byte) + boundary but may not be at cache line (128-byte) boundary. If the + remaining length spans a full cache line we can use the Data cache + block zero instruction. */ +L(zloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + beq L(medium) +L(getCacheAligned): + andi. rTMP,rMEMP,127 + nop + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) +L(getCacheAligned2): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) +L(getCacheAligned3): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAlignedx) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 5 +L(cacheAligned): + cmpldi cr1,rLEN,128 + cmpldi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + li rMEMP2,128 +L(cacheAlignedx): + cmpldi cr5,rLEN,640 + blt cr6,L(cacheAligned128) + bgt cr5,L(cacheAligned512) + cmpldi cr6,rLEN,512 + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAligned256) + .align 5 +/* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ +L(cacheAligned512): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 + addi rMEMP,rMEMP,128 + b L(cacheAligned512) + .align 5 +L(cacheAligned256): + + cmpldi cr6,rLEN,512 + + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + + bge cr6,L(cacheAligned256) + + blt cr1,L(cacheAligned1) + .align 4 +L(cacheAligned128): + dcbz 0,rMEMP + addi rMEMP,rMEMP,128 + addi rLEN,rLEN,-128 + nop +L(cacheAligned1): + cmpldi cr1,rLEN,32 + blt cr1,L(handletail32) + addi rMEMP3,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,-8(rMEMP3) +L(cacheAligned2): + blt cr1,L(handletail32) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + nop +L(cacheAligned3): + blt cr1,L(handletail32) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + +/* We are here because the length or remainder (rLEN) is less than the + cache line/sector size and does not justify aggressive loop unrolling. + So set up the preconditions for L(medium) and go there. */ + .align 3 +L(handletail32): + cmpldi cr1,rLEN,0 + beqlr cr1 + b L(medium) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt 29, L(medium_29t) +L(medium_29f): + bge cr1, L(medium_27t) + bflr 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies new file mode 100644 index 0000000000..2ebe304fa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c new file mode 100644 index 0000000000..ae04a130cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c new file mode 100644 index 0000000000..722c8f995b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c new file mode 100644 index 0000000000..b86472d7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S new file mode 100644 index 0000000000..b6e11ba0c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S @@ -0,0 +1,58 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mftgpr r4,fp1 /* copy FPR to GPR */ + lis r0,0x7ff0 + ori r1,r1,0 + clrldi r4,r4,1 /* x = fabs(x) */ + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S new file mode 100644 index 0000000000..37aa69061c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S @@ -0,0 +1,44 @@ +/* Round double to long int. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power6" +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + mftgpr r3,fp13 + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S new file mode 100644 index 0000000000..62e1798785 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S @@ -0,0 +1,54 @@ +/* llround function. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power6" +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp2,fp1 /* Round to nearest +-0.5. */ + fctidz fp3,fp2 /* Convert To Integer DW round toward 0. */ + mftgpr r3,fp3 /* Transfer integer to R3. */ + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile new file mode 100644 index 0000000000..89a2296085 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile @@ -0,0 +1,11 @@ +ifeq ($(subdir),elf) +# Prevent the use of VSX registers and insns in _dl_start, which under -O3 +# optimization may require a TOC reference before relocations are resolved. +CFLAGS-rtld.c += -mno-vsx +endif + +ifeq ($(subdir),string) +sysdep_routines += strstr-ppc64 +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S new file mode 100644 index 0000000000..6425afbc9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S @@ -0,0 +1,98 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* cycles/limb + * POWER7 2.18 + */ + +#ifdef USE_AS_SUB +# define FUNC __mpn_sub_n +# define ADDSUBC subfe +#else +# define FUNC __mpn_add_n +# define ADDSUBC adde +#endif + +#define RP r3 +#define UP r4 +#define VP r5 +#define N r6 + +EALIGN(FUNC, 5, 0) +#ifdef USE_AS_SUB + addic r0, r0, 0 +#else + addic r0, r1, -1 +#endif + andi. r7, N, 1 + beq L(bx0) + + ld r7, 0(UP) + ld r9, r0(VP) + ADDSUBC r11, r9, r7 + std r11, r0(RP) + cmpldi N, N, 1 + beq N, L(end) + addi UP, UP, 8 + addi VP, VP, 8 + addi RP, RP, 8 + +L(bx0): addi r0, N, 2 + srdi r0, r0, 2 + mtctr r0 + + andi. r7, N, 2 + bne L(mid) + + addi UP, UP, 16 + addi VP, VP, 16 + addi RP, RP, 16 + + .align 5 +L(top): ld r6, -16(UP) + ld r7, -8(UP) + ld r8, -16(VP) + ld r9, -8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, -16(RP) + std r11, -8(RP) +L(mid): ld r6, 0(UP) + ld r7, 8(UP) + ld r8, 0(VP) + ld r9, 8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, 0(RP) + std r11, 8(RP) + addi UP, UP, 32 + addi VP, VP, 32 + addi RP, RP, 32 + bdnz L(top) + +L(end): subfe r3, r0, r0 +#ifdef USE_AS_SUB + neg r3, r3 +#else + addi r3, r3, 1 +#endif + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c new file mode 100644 index 0000000000..4a6a400e7a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c @@ -0,0 +1 @@ +/* Implemented at memmove.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S new file mode 100644 index 0000000000..9ccc758c9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S @@ -0,0 +1,70 @@ +/* finite(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __finite(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __finite, @function + .machine power7 +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,1 + bflr 30 + + /* If we are here, we either have +/-INF, + NaN or denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + clrlwi r4,r4,17 /* r4 = abs(r4). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + bltlr cr7 /* LT means finite, other non-finite. */ + li r3,0 + blr + END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S new file mode 100644 index 0000000000..4482cddcfa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S @@ -0,0 +1,69 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isinf(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isinf, @function + .machine power7 +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 29 /* If not INF, return. */ + + /* Either we have -INF/+INF or a denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ + li r3,-1 + blr + END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S new file mode 100644 index 0000000000..46b08a0d37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S @@ -0,0 +1,68 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isnan, @function + .machine power7 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 30 /* If not NaN, finish. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + ld r4,-16(r1) /* Load FP into GPR. */ + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + blelr cr7 /* LE means not NaN. */ + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c new file mode 100644 index 0000000000..2599c771d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c new file mode 100644 index 0000000000..7a5a8032e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c new file mode 100644 index 0000000000..524ae2c78d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S new file mode 100644 index 0000000000..5e9707aa02 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S @@ -0,0 +1,199 @@ +/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMCHR +# define MEMCHR __memchr +#endif + .machine power7 +ENTRY (MEMCHR) + CALL_MCOUNT 3 + dcbt 0,r3 + clrrdi r8,r3,3 + insrdi r4,r4,8,48 + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r5 + r7 |= -(r7 < x) */ + add r7,r3,r5 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + + insrdi r4,r4,16,32 + cmpldi r5,32 + li r9, -1 + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + insrdi r4,r4,32,0 + addi r7,r7,-1 +#ifdef __LITTLE_ENDIAN__ + sld r9,r9,r6 +#else + srd r9,r9,r6 +#endif + ble L(small_range) + + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */ + and r3,r3,r9 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r6,r7,r8 + srdi r6,r6,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for BYTE in the string. Since + it's a small loop (8 instructions), align it to 32-bytes. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r6,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld r8,r7 + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the doublewords contains BYTE. Check + the first doubleword and decrement the address in case the first + doubleword really contains BYTE. */ + cmpldi cr6,r3,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* BYTE must be in the second doubleword. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + doubleword from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r3,-1 + andc r0,r0,r3 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r3 /* Count leading zeros before the match. */ +#endif + cmpld r8,r7 /* Are we on the last dword? */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 + cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */ + bnelr + blelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + ld r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpld r8,r7 /* Are we done already? */ + bne cr7,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) /* Found something. */ + beqlr /* Hit end of string (length). */ + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + +END (MEMCHR) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S new file mode 100644 index 0000000000..96ce8cee25 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -0,0 +1,1061 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S new file mode 100644 index 0000000000..e08993cbc3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -0,0 +1,430 @@ +/* Optimized memcpy implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define dst 11 /* Use r11 so r3 kept unchanged. */ +#define src 4 +#define cnt 5 + + .machine power7 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,cnt,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + +/* Align copies using VSX instructions to quadword. It is to avoid alignment + traps when memcpy is used on non-cacheable memory (for instance, memory + mapped I/O). */ + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr dst,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,16f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +16: + subf cnt,0,cnt + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,cnt + srdi 12,cnt,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,src + lxvd2x 7,src,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,src + lxvd2x 7,src,6 +L(aligned_128loop): + lxvd2x 8,src,7 + lxvd2x 9,src,8 + stxvd2x 6,0,dst + addi src,src,64 + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi dst,dst,64 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,cnt + bf 25,32f + lxvd2x 6,0,src + lxvd2x 7,src,6 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 +32: + bf 26,16f + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi src,src,32 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + addi dst,dst,32 +16: + bf 27,8f + lxvd2x 6,0,src + addi src,src,16 + stxvd2x 6,0,dst + addi dst,dst,16 +8: + bf 28,4f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr dst,3 + cmpldi cr6,cnt,8 + mtocrf 0x01,cnt + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,cnt,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf cnt,0,cnt +2: + bf 30,1f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,cnt,16 + mtocrf 0x01,cnt + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + lwz 8,8(src) + stw 7,4(dst) + lwz 6,12(src) + addi src,src,16 + stw 8,8(dst) + stw 6,12(dst) + addi dst,dst,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(src) + sth 6,0(dst) + bflr 31 + lbz 7,2(src) + stb 7,2(dst) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(src) + stb 6,4(dst) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(src) + stb 6,0(dst) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + stw 7,4(dst) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf cnt,0,cnt + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,0f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +0: + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,cnt,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,cnt,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,src +#else + lvsl 5,0,src +#endif + lvx 3,0,src + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi src,src,16 + stvx 6,0,dst + addi dst,dst,16 + vor 3,4,4 + clrrdi 0,src,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,src,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi src,src,32 + stvx 6,0,dst + stvx 10,dst,6 + addi dst,dst,32 + bdnz L(unaligned_loop) + + clrrdi 0,src,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,cnt + beqlr cr1 + + add src,src,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S new file mode 100644 index 0000000000..4c0f7c3571 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S @@ -0,0 +1,835 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5]) + + This optimization check if memory 'dest' overlaps with 'src'. If it does + not then it calls an optimized memcpy call (similar to memcpy for POWER7, + embedded here to gain some cycles). + If source and destiny overlaps, a optimized backwards memcpy is used + instead. */ + +#ifndef MEMMOVE +# define MEMMOVE memmove +#endif + .machine power7 +EALIGN (MEMMOVE, 5, 0) + CALL_MCOUNT 3 + +L(_memmove): + subf r9,r4,r3 + cmpld cr7,r9,r5 + blt cr7,L(memmove_bwd) + + cmpldi cr1,r5,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr r11,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,16f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,r5 + srdi 12,r5,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 +L(aligned_128loop): + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + stxvd2x 6,0,r11 + addi r4,r4,64 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r11,r11,64 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 +32: + bf 26,16f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r4,r4,32 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + addi r11,r11,32 +16: + bf 27,8f + lxvd2x 6,0,r4 + addi r4,r4,16 + stxvd2x 6,0,r11 + addi r11,r11,16 +8: + bf 28,4f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr r11,3 + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + lwz 8,8(r4) + stw 7,4(r11) + lwz 6,12(r4) + addi r4,r4,16 + stw 8,8(r11) + stw 6,12(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(r4) + sth 6,0(r11) + bflr 31 + lbz 7,2(r4) + stb 7,2(r11) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(r4) + stb 6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(r4) + stb 6,0(r11) + /* Return original DST pointer. */ + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + stw 7,4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st r11 quadword. */ + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf r5,0,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,0f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +0: + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,r5,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,r4 +#else + lvsl 5,0,r4 +#endif + lvx 3,0,r4 + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi r4,r4,16 + stvx 6,0,r11 + addi r11,r11,16 + vor 3,4,4 + clrrdi 0,r4,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,r4,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi r4,r4,32 + stvx 6,0,r11 + stvx 10,r11,6 + addi r11,r11,32 + bdnz L(unaligned_loop) + + clrrdi 0,r4,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + /* Start to memcpy backward implementation: the algorith first check if + src and dest have the same alignment and if it does align both to 16 + bytes and copy using VSX instructions. + If does not, align dest to 16 bytes and use VMX (altivec) instruction + to read two 16 bytes at time, shift/permute the bytes read and write + aligned to dest. */ +L(memmove_bwd): + cmpldi cr1,r5,31 + /* Copy is done backwards: update the pointers and check alignment. */ + add r11,r3,r5 + add r4,r4,r5 + mr r0,r11 + ble cr1, L(copy_LT_32_bwd) /* If move < 32 bytes use short move + code. */ + + andi. r10,r11,15 /* Check if r11 is aligned to 16 bytes */ + clrldi r9,r4,60 /* Check if r4 is aligned to 16 bytes */ + cmpld cr6,r10,r9 /* SRC and DST alignments match? */ + + bne cr6,L(copy_GE_32_unaligned_bwd) + beq L(aligned_copy_bwd) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,16f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy_bwd): + li r6,-16 + li r7,-32 + li r8,-48 + li r9,-64 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail_bwd) + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + mtctr 12 + b L(aligned_128loop_bwd) + + .align 4 +L(aligned_128head_bwd): + /* for the 2nd + iteration of this loop. */ + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 +L(aligned_128loop_bwd): + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + stxvd2x v6,r11,r6 + subi r4,r4,64 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + lxvd2x v6,r4,r6 + lxvd2x v7,r4,7 + subi r11,r11,64 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 + bdnz L(aligned_128head_bwd) + +L(aligned_tail_bwd): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 +32: + bf 26,16f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + subi r4,r4,32 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + subi r11,r11,32 +16: + bf 27,8f + lxvd2x v6,r4,r6 + subi r4,r4,16 + stxvd2x v6,r11,r6 + subi r11,r11,16 +8: + bf 28,4f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32_bwd): + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8_bwd) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned_bwd) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment_bwd) + lbz 6,-1(r4) + subi r4,r4,1 + stb 6,-1(r11) + subi r11,r11,1 + + .align 4 +L(end_4bytes_alignment_bwd): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned_bwd): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz r6,-4(r4) + lwz r7,-8(r4) + stw r6,-4(r11) + lwz r8,-12(r4) + stw r7,-8(r11) + lwz r6,-16(r4) + subi r4,r4,16 + stw r8,-12(r11) + stw r6,-16(r11) + subi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4_bwd) + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4_bwd): + bf 29,L(tail2_bwd) + lwz 6,-4(r4) + stw 6,-4(r11) + bf 30,L(tail5_bwd) + lhz 7,-6(r4) + sth 7,-6(r11) + bflr 31 + lbz 8,-7(r4) + stb 8,-7(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2_bwd): + bf 30,1f + lhz 6,-2(r4) + sth 6,-2(r11) + bflr 31 + lbz 7,-3(r4) + stb 7,-3(r11) + blr + + .align 4 +L(tail5_bwd): + bflr 31 + lbz 6,-5(r4) + stb 6,-5(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,-1(r4) + stb 6,-1(r11) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8_bwd): + bne cr6,L(tail4_bwd) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + lwz 6,-8(r4) + lwz 7,-4(r4) + stw 6,-8(r11) + stw 7,-4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned_bwd): + andi. r10,r11,15 /* Check alignment of DST against 16 bytes.. */ + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont_bwd) + + /* DST is not quadword aligned and r10 holds the address masked to + compare alignments. */ + mtocrf 0x01,r10 + subf r5,r10,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,0f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +0: + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont_bwd): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi r10,r5,60 + li r6,-16 /* Index for 16-bytes offsets. */ + li r7,-32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi r8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,r9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr v5,r0,r4 +#else + lvsl v5,r0,r4 +#endif + lvx v3,0,r4 + li r0,0 + bf 31,L(setup_unaligned_loop_bwd) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + subi r4,r4,16 + stvx v6,r11,r6 + subi r11,r11,16 + vor v3,v4,v4 + clrrdi r0,r4,60 + +L(setup_unaligned_loop_bwd): + mtctr r8 + ble cr6,L(end_unaligned_loop_bwd) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop_bwd): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + lvx v3,r4,r7 +#ifdef __LITTLE_ENDIAN__ + vperm v10,v4,v3,v5 +#else + vperm v10,v3,v4,v5 +#endif + subi r4,r4,32 + stvx v6,r11,r6 + stvx v10,r11,r7 + subi r11,r11,32 + bdnz L(unaligned_loop_bwd) + + clrrdi r0,r4,60 + + .align 4 +L(end_unaligned_loop_bwd): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr +END_GEN_TB (MEMMOVE, TB_TOCLESS) +libc_hidden_builtin_def (memmove) + + +/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5]) + Implemented in this file to avoid linker create a stub function call + in the branch to '_memmove'. */ +ENTRY (__bcopy) + mr r6,r3 + mr r3,r4 + mr r4,r6 + b L(_memmove) +END (__bcopy) +weak_alias (__bcopy, bcopy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S new file mode 100644 index 0000000000..4e15d1e40c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S @@ -0,0 +1,472 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst' + 'len'. */ + +#ifndef MEMPCPY +# define MEMPCPY __mempcpy +#endif + .machine power7 +EALIGN (MEMPCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,7 /* Check alignment of DST. */ + + + clrldi 10,4,61 /* Check alignment of SRC. */ + cmpld cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srdi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrldi 0,0,61 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* Check alignment of SRC again. */ + srdi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrldi 11,31,61 + mtcrf 0x01,9 + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + /* Main aligned copy loop. Copies 32-bytes at a time. */ + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 + + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + /* Check for tail bytes. */ + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmpldi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + cmpldi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + ld 3,-16(1) /* Return DST + LEN pointer. */ + add 3,3,5 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st + quadword. */ + andi. 11,3,15 /* Check alignment of DST (against + quadwords). */ + srdi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* SRC is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + ld 6,0(12) + addi 12,12,8 + std 6,0(3) + addi 3,3,8 +0: + clrldi 10,12,60 /* Check alignment of SRC. */ + srdi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 11,31,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,11,0 + srdi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + rldicr 0,31,0,59 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + +END_GEN_TB (MEMPCPY,TB_TOCLESS) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S new file mode 100644 index 0000000000..4276768915 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -0,0 +1,201 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMRCHR +# define MEMRCHR __memrchr +#endif + .machine power7 +ENTRY (MEMRCHR) + CALL_MCOUNT 3 + add r7,r3,r5 /* Calculate the last acceptable address. */ + neg r0,r7 + addi r7,r7,-1 + mr r10,r3 + clrrdi r6,r7,7 + li r9,3<<5 + dcbt r9,r6,8 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + li r6,-8 + li r9,-1 + rlwinm r0,r0,3,26,28 /* Calculate padding. */ + clrrdi r8,r7,3 + srd r9,r9,r0 + cmpldi r5,32 + clrrdi r0,r10,3 + ble L(small_range) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bf 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ +#ifdef __LITTLE_ENDIAN__ + ldx r12,r8,r6 +#else + ldbrx r12,r8,r6 +#endif + addi r8,r8,-8 + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the first byte of the string, ie. the dword at + s & ~7, or r0. The first dword read is at r8 - 8, we + read 2 * cnt dwords, so the last dword read will be at + r8 - 8 - 16 * cnt + 8. Solving for cnt gives + cnt = (r8 - r0) / 16 */ + sub r5,r8,r0 + addi r8,r8,-8 + srdi r9,r5,4 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ + + /* Main loop to look for BYTE backwards in the string. + FIXME: Investigate whether 32 byte align helps with this + 9 instruction loop. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 + ldx r11,r8,r6 +#else + ldbrx r12,0,r8 + ldbrx r11,r8,r6 +#endif + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r5,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + addi r8,r8,-16 + bdnz L(loop) + + /* We may have one more word to read. */ + cmpld r8,r0 + bnelr + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the dwords contains BYTE. Check + the first dword. */ + cmpldi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,-8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ +L(done): + cntlzd r9,r3 /* Count leading zeros before the match. */ + cmpld r8,r0 /* Are we on the last word? */ + srdi r6,r9,3 /* Convert leading zeros to bytes. */ + addi r0,r6,-7 + sub r3,r8,r0 + cmpld cr7,r3,r10 + bnelr + bgelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ + cmpld r8,r0 + addi r8,r8,-8 + beqlr + + .align 5 +L(loop_small): +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpld r8,r0 + cmpldi cr7,r3,0 + bne cr7,L(done) + addi r8,r8,-8 + bne L(loop_small) + blr + +END (MEMRCHR) +weak_alias (__memrchr, memrchr) +libc_hidden_builtin_def (memrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S new file mode 100644 index 0000000000..21933c0672 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S @@ -0,0 +1,399 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,5,31 + cmpldi cr6,5,8 + mr 10,3 + + /* Replicate byte to word. */ + insrdi 4,4,8,48 + insrdi 4,4,16,32 + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 + ble cr7,L(medium) /* If length < 32, use medium copy code. */ + + andi. 11,10,7 /* Check alignment of SRC. */ + insrdi 4,4,32,0 /* Replicate word to double word. */ + + mr 12,5 + beq L(big_aligned) + + clrldi 0,0,61 + mtocrf 0x01,0 + subf 5,0,5 + + /* Get DST aligned to 8 bytes. */ +1: bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: bf 30,4f + + sth 4,0(10) + addi 10,10,2 +4: bf 29,L(big_aligned) + + stw 4,0(10) + addi 10,10,4 + + .align 4 +L(big_aligned): + + cmpldi cr5,5,255 + li 0,32 + dcbtst 0,10 + cmpldi cr6,4,0 + srdi 9,5,3 /* Number of full doublewords remaining. */ + crand 27,26,21 + mtocrf 0x01,9 + bt 27,L(huge) + + /* From this point on, we'll copy 32+ bytes and the value + isn't 0 (so we can't use dcbz). */ + + srdi 8,5,5 + clrldi 11,5,61 + cmpldi cr6,11,0 + cmpldi cr1,9,4 + mtctr 8 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + mr 12,10 + blt cr1,L(tail_bytes) + b L(big_loop) + + .align 4 +1: /* Copy 1 doubleword. */ + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + + /* Main aligned copy loop. Copies 32-bytes at a time and + ping-pong through r10 and r12 to avoid AGEN delays. */ + .align 4 +L(big_loop): + addi 12,10,32 + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + bdz L(tail_bytes) + + addi 10,10,64 + std 4,0(12) + std 4,8(12) + std 4,16(12) + std 4,24(12) + bdnz L(big_loop) + + mr 12,10 + b L(tail_bytes) + + .align 4 +L(tail_bytes): + + /* Check for tail bytes. */ + beqlr cr6 + + clrldi 0,5,61 + mtocrf 0x01,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(12) + addi 12,12,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + sth 4,0(12) + addi 12,12,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(12) + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out 128-bytes at a time. Before using + dcbz though, we need to get the destination 128-bytes aligned. */ + .align 4 +L(huge): + andi. 11,10,127 + neg 0,10 + beq L(huge_aligned) + + clrldi 0,0,57 + subf 5,0,5 + srdi 0,0,3 + mtocrf 0x01,0 + + /* Get DST aligned to 128 bytes. */ +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(huge_aligned) + + std 4,0(10) + addi 10,10,8 + + +L(huge_aligned): + srdi 8,5,7 + clrldi 11,5,57 + cmpldi cr6,11,0 + mtctr 8 + + .align 4 +L(huge_loop): + dcbz 0,10 + addi 10,10,128 + bdnz L(huge_loop) + + /* Check how many bytes are still left. */ + beqlr cr6 + + subf 9,3,10 + subf 5,9,12 + srdi 8,5,3 + cmpldi cr6,8,0 + mtocrf 0x01,8 + + /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for + speed. We'll handle the resulting tail bytes later. */ + beq cr6,L(tail) + +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(tail) + + std 4,0(10) + addi 10,10,8 + + /* Handle the rest of the tail bytes here. */ +L(tail): + mtocrf 0x01,5 + + .align 4 +4: bf 29,2f + + stw 4,0(10) + addi 10,10,4 + .align 4 +2: bf 30,1f + + sth 4,0(10) + addi 10,10,2 + .align 4 +1: bflr 31 + + stb 4,0(10) + blr + + /* Expanded tree to copy tail bytes without increments. */ + .align 4 +L(copy_tail): + bf 29,L(FXX) + + stw 4,0(10) + bf 30,L(TFX) + + sth 4,4(10) + bflr 31 + + stb 4,6(10) + blr + + .align 4 +L(FXX): bf 30,L(FFX) + + sth 4,0(10) + bflr 31 + + stb 4,2(10) + blr + + .align 4 +L(TFX): bflr 31 + + stb 4,4(10) + blr + + .align 4 +L(FFX): bflr 31 + + stb 4,0(10) + blr + + /* Handle copies of 9~31 bytes. */ + .align 4 +L(medium): + /* At least 9 bytes to go. */ + andi. 11,10,3 + clrldi 0,0,62 + beq L(medium_aligned) + + /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 +1: /* Copy 1 byte. */ + bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: /* Copy 2 bytes. */ + bf 30,L(medium_aligned) + + sth 4,0(10) + addi 10,10,2 + + .align 4 +L(medium_aligned): + /* At least 6 bytes to go, and DST is word-aligned. */ + cmpldi cr1,5,16 + mtocrf 0x01,5 + blt cr1,8f + + /* Copy 16 bytes. */ + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(10) + addi 10,10,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + sth 4,0(10) + addi 10,10,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(small): + mtocrf 0x01,5 + bne cr6,L(copy_tail) + + stw 4,0(10) + stw 4,4(10) + blr + +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S new file mode 100644 index 0000000000..48afb75943 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S @@ -0,0 +1,115 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] rawmemchr (void *s [r3], int c [r4]) */ + +#ifndef RAWMEMCHR +# define RAWMEMCHR __rawmemchr +#endif + .machine power7 +ENTRY (RAWMEMCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 /* Move left to discard ignored bits. */ + srd r5,r5,r6 /* Bring the bits back as zeros. */ +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r4 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r4 + cmpb r6,r11,r4 + or r7,r5,r6 + cmpdi cr7,r7,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a 'c' byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The 'c' byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + mr r5,r6 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the 'c' byte in the original + doubleword from the string. Use that fact to find out what is + the position of the byte inside the string. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr +END (RAWMEMCHR) +weak_alias (__rawmemchr,rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S new file mode 100644 index 0000000000..a346dd7e28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S new file mode 100644 index 0000000000..e856b8a593 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -0,0 +1,126 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (__STRCMP) +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + CALL_MCOUNT 2 +#else + CALL_MCOUNT 3 +#endif + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) +#else + mr rLOC, rLOCARG +#endif + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 + + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1,L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + b L(loop) +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCMP) + +weak_alias (__STRCMP, STRCMP) +libc_hidden_builtin_def (__STRCMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S new file mode 100644 index 0000000000..a18e2e101c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S @@ -0,0 +1,230 @@ +/* Optimized strchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHR +# define STRCHR strchr +#endif + +/* int [r3] strchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r12,r10,r11 + or r9,r6,r7 + or r5,r12,r9 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + .align 4 +L(no_match): + li r3,0 + blr + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRCHR) +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S new file mode 100644 index 0000000000..27bc1f0682 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S @@ -0,0 +1,131 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul +#endif +/* int [r3] strchrnul (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHRNUL) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r0 /* Compare each byte against c byte. */ + cmpb r9,r12,r4 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r9,r9,r6 + sld r10,r10,r6 + sld r9,r9,r6 +#else + sld r10,r10,r6 + sld r9,r9,r6 + srd r10,r10,r6 + srd r9,r9,r6 +#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + or r5,r9,r10 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 + cmpb r7,r11,r4 + or r5,r9,r10 + or r10,r6,r7 + or r11,r5,r10 + cmpdi cr7,r11,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r5 so we can calculate + the pointer. */ + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +END (STRCHRNUL) +weak_alias (STRCHRNUL, strchrnul) +libc_hidden_builtin_def (STRCHRNUL) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S new file mode 100644 index 0000000000..14e14f457e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S @@ -0,0 +1,168 @@ +/* Optimized strcmp implementation for Power7 using 'cmpb' instruction + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The optimization is achieved here through cmpb instruction. + 8byte aligned strings are processed with double word comparision + and unaligned strings are handled effectively with loop unrolling + technique */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + + or r9, r3, r4 + rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ + bne cr0, L(process_unaligned_bytes) + li r5, 0 + + .align 4 +/* process input parameters on double word aligned boundary */ +L(unrollDword): + ld r8,0(r3) + ld r10,0(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,16(r3) + ld r10,16(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,24(r3) + ld r10,24(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + addi r3, r3, 32 + addi r4, r4, 32 + beq cr7, L(unrollDword) + + .align 4 +L(null_found): +#ifdef __LITTLE_ENDIAN__ + neg r7,r9 + and r9,r9,r7 + li r7,-1 + cntlzd r9,r9 + subfic r9,r9,71 + sld r9,r7,r9 +#else + cntlzd r9,r9 + li r7,-1 + addi r9,r9,8 + srd r9,r7,r9 +#endif + or r8,r8,r9 + or r10,r10,r9 + +L(different): + cmpb r9,r8,r10 +#ifdef __LITTLE_ENDIAN__ + addi r7,r9,1 + andc r9,r7,r9 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + blr + + .align 4 +L(process_unaligned_bytes): + lbz r9, 0(r3) /* load byte from s1 */ + lbz r10, 0(r4) /* load byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 1(r3) /* load next byte from s1 */ + lbz r10, 1(r4) /* load next byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 2(r3) /* unroll 3rd byte here */ + lbz r10, 2(r4) + cmpdi cr7, r9, 0 + beq cr7, L(diffOfNULL) + cmplw cr7, r9, r10 + bne 7, L(ComputeDiff) + + lbz r9, 3(r3) /* unroll 4th byte now */ + lbz r10, 3(r4) + addi r3, r3, 4 /* increment s1 by unroll factor */ + cmpdi cr7, r9, 0 + cmplw cr6, 9, r10 + beq cr7, L(diffOfNULL) + addi r4, r4, 4 /* increment s2 by unroll factor */ + beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ + + .align 4 +L(ComputeDiff): + extsw r9, r9 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 + blr /* return */ + + .align 4 +L(diffOfNULL): + li r9, 0 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 /* sign extend result */ + blr /* return */ + +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S new file mode 100644 index 0000000000..63848c460c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S @@ -0,0 +1,107 @@ +/* Optimized strlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + .machine power7 +ENTRY (STRLEN) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r4 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r4) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r4) + ldu r11, 16(r4) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r4,r4,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S new file mode 100644 index 0000000000..d53b31be8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -0,0 +1,227 @@ +/* Optimized strcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (STRNCMP,5,0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + nop + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + nop + clrldi. rTMP,rTMP,61 + cmpldi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP,rN,3 + clrldi rN,rN,61 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmpldi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + sldi rTMP,rFEFE,32 + insrdi r7F7F,r7F7F,32,0 + add rFEFE,rFEFE,rTMP + b L(g1) + +L(g0): + ldu rWORD1,8(rSTR1) + bne cr1,L(different) + ldu rWORD2,8(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzd rBITDIF,rBITDIF + cntlzd rNEG,rNEG + addi rNEG,rNEG,7 + cmpd cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) + sradi rRTN,rRTN,63 /* must return an int. */ + ori rRTN,rRTN,1 + blr +L(equal): + li rRTN,0 + blr + +L(different): + ld rWORD1,-8(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) + sradi rRTN,rRTN,63 + ori rRTN,rRTN,1 + blr +L(highbit): + sradi rRTN,rWORD2,63 + ori rRTN,rRTN,1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,8 + bne cr1,L(different) + addi rSTR2,rSTR2,8 + cmpldi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + b L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S new file mode 100644 index 0000000000..0224f74898 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S @@ -0,0 +1,722 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Implements the functions + + char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + AND + + char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + The algorithm is as follows: + > if src and dest are 8 byte aligned, perform double word copy + else + > copy byte by byte on unaligned addresses. + + The aligned comparison are made using cmpb instructions. */ + +/* The focus on optimization for performance improvements are as follows: + 1. data alignment [gain from aligned memory access on read/write] + 2. POWER7 gains performance with loop unrolling/unwinding + [gain by reduction of branch penalty]. + 3. The final pad with null bytes is done by calling an optimized + memset. */ + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + +#ifndef MEMSET +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + + .machine power7 +EALIGN(FUNC_NAME, 4, 0) + CALL_MCOUNT 3 + + mflr r0 /* load link register LR to r0 */ + or r10, r3, r4 /* to verify source and destination */ + rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ + + std r19, -8(r1) /* save callers register , r19 */ + std r18, -16(r1) /* save callers register , r18 */ + std r0, 16(r1) /* store the link register */ + stdu r1, -FRAMESIZE(r1) /* create the stack frame */ + + mr r9, r3 /* save r3 into r9 for use */ + mr r18, r3 /* save r3 for retCode of strncpy */ + bne 0, L(unaligned) + +L(aligned): + srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ + cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ + ble 7, L(update1) + + ld r10, 0(r4) /* load doubleWord from src */ + cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne cr7, L(update3) + + std r10, 0(r3) /* copy doubleword at offset=0 */ + ld r10, 8(r4) /* load next doubleword from offset=8 */ + cmpb r8, r10, r8 /* compare src with NULL , we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne 7,L(HopBy8) + + addi r8, r11, -4 + mr r7, r3 + srdi r8, r8, 2 + mr r6, r4 + addi r8, r8, 1 + li r12, 0 + mtctr r8 + b L(dwordCopy) + + .p2align 4 +L(dWordUnroll): + std r8, 16(r9) + ld r8, 24(r4) /* load dword,perform loop unrolling again */ + cmpb r10, r8, r10 + cmpdi cr7, r10, 0 + bne cr7, L(HopBy24) + + std r8, 24(r7) /* copy dword at offset=24 */ + addi r9, r9, 32 + addi r4, r4, 32 + bdz L(leftDwords) /* continue with loop on counter */ + + ld r3, 32(r6) + cmpb r8, r3, r10 + cmpdi cr7, r8, 0 + bne cr7, L(update2) + + std r3, 32(r7) + ld r10, 40(r6) + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(HopBy40) + + mr r6, r4 /* update values */ + mr r7, r9 + mr r11, r0 + mr r5, r19 + +L(dwordCopy): + std r10, 8(r9) /* copy dword at offset=8 */ + addi r19, r5, -32 + addi r0, r11, -4 + ld r8, 16(r4) + cmpb r10, r8, r12 + cmpdi cr7, r10, 0 + beq cr7, L(dWordUnroll) + + addi r9, r9, 16 /* increment dst by 16 */ + addi r4, r4, 16 /* increment src by 16 */ + addi r5, r5, -16 /* decrement length 'n' by 16 */ + addi r0, r11, -2 /* decrement loop counter */ + +L(dWordUnrollOFF): + ld r10, 0(r4) /* load first dword */ + li r8, 0 /* load mask */ + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + mtctr r0 + li r7, 0 + b L(CopyDword) + + .p2align 4 +L(loadDWordandCompare): + ld r10, 0(r4) + cmpb r8, r10, r7 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + +L(CopyDword): + addi r9, r9, 8 + std r10, -8(r9) + addi r4, r4, 8 + addi r5, r5, -8 + bdnz L(loadDWordandCompare) + +L(byte_by_byte): + cmpldi cr7, r5, 3 + ble cr7, L(verifyByte) + srdi r10, r5, 2 + mr r19, r9 + mtctr r10 + b L(firstByteUnroll) + + .p2align 4 +L(bytes_unroll): + lbz r10, 1(r4) /* load byte from src */ + cmpdi cr7, r10, 0 /* compare for NULL */ + stb r10, 1(r19) /* store byte to dst */ + beq cr7, L(updtDestComputeN2ndByte) + + addi r4, r4, 4 /* advance src */ + + lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, r10, 0 + stb r10, 2(r19) + beq cr7, L(updtDestComputeN3rdByte) + + lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ + addi r19, r19, 4 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + beq cr7, L(ComputeNByte) + + bdz L(update0) + +L(firstByteUnroll): + lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, 10, 0 + stb r10, 0(r19) + bne cr7, L(bytes_unroll) + addi r19, r19, 1 + +L(ComputeNByte): + subf r9, r19, r9 /* compute 'n'n bytes to fill */ + add r8, r9, r5 + +L(zeroFill): + cmpdi cr7, r8, 0 /* compare if length is zero */ + beq cr7, L(update3return) + + mr r3, r19 /* fill buffer with */ + li r4, 0 /* zero fill buffer */ + mr r5, r8 /* how many bytes to fill buffer with */ + bl MEMSET /* call optimized memset */ + nop + +L(update3return): +#ifdef USE_AS_STPNCPY + addi r3, r19, -1 /* update return value */ +#endif + +L(hop2return): +#ifndef USE_AS_STPNCPY + mr r3, r18 /* set return value */ +#endif + addi r1, r1, FRAMESIZE /* restore stack pointer */ + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + + .p2align 4 +L(update0): + mr r9, r19 + + .p2align 4 +L(verifyByte): + rldicl. r8, r5, 0, 62 +#ifdef USE_AS_STPNCPY + mr r3, r9 +#endif + beq cr0, L(hop2return) + mtctr r8 + addi r4, r4, -1 + mr r19, r9 + b L(oneBYone) + + .p2align 4 +L(proceed): + bdz L(done) + +L(oneBYone): + lbzu r10, 1(r4) /* copy byte */ + addi r19, r19, 1 + addi r8, r8, -1 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + bne cr7, L(proceed) + b L(zeroFill) + + .p2align 4 +L(done): + addi r1, r1, FRAMESIZE /* restore stack pointer */ +#ifdef USE_AS_STPNCPY + mr r3, r19 /* set the return value */ +#else + mr r3, r18 /* set the return value */ +#endif + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + +L(update1): + mr r0, r11 + mr r19, r5 + + .p2align 4 +L(leftDwords): + cmpdi cr7, r0, 0 + mr r5, r19 + bne cr7, L(dWordUnrollOFF) + b L(byte_by_byte) + + .p2align 4 +L(updtDestComputeN2ndByte): + addi r19, r19, 2 /* update dst by 2 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(updtDestComputeN3rdByte): + addi r19, r19, 3 /* update dst by 3 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(HopBy24): + addi r9, r9, 24 /* increment dst by 24 */ + addi r4, r4, 24 /* increment src by 24 */ + addi r5, r5, -24 /* decrement length 'n' by 24 */ + addi r0, r11, -3 /* decrement loop counter */ + b L(dWordUnrollOFF) + + .p2align 4 +L(update2): + mr r5, r19 + b L(dWordUnrollOFF) + + .p2align 4 +L(HopBy40): + addi r9, r7, 40 /* increment dst by 40 */ + addi r4, r6, 40 /* increment src by 40 */ + addi r5, r5, -40 /* decrement length 'n' by 40 */ + addi r0, r11, -5 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(update3): + mr r0, r11 + b L(dWordUnrollOFF) + +L(HopBy8): + addi r9, r3, 8 /* increment dst by 8 */ + addi r4, r4, 8 /* increment src by 8 */ + addi r5, r5, -8 /* decrement length 'n' by 8 */ + addi r0, r11, -1 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(unaligned): + cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ + ble L(byte_by_byte) + rldicl r7, r3, 0, 61 + rldicl r6, r4, 0, 61 + cmpdi r6, 0 /* Check src alignment */ + beq L(srcaligndstunalign) + /* src is unaligned */ + rlwinm r10, r4, 3,26,28 /* Calculate padding. */ + clrrdi r4, r4, 3 /* Align the addr to dw boundary */ + ld r8, 0(r4) /* Load doubleword from memory. */ + li r0, 0 + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + srd r7, r8, r10 +#else + sld r7, r8, r10 +#endif + cmpb r0, r7, r0 /* Compare each byte against null */ + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + sld r0, r0, r10 +#else + srd r0, r0, r10 +#endif + cmpdi r0, 0 + bne L(bytebybyte) /* if it has null, copy byte by byte */ + subfic r6, r6, 8 + rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ + rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ + addi r3, r3, -1 + + cmpdi r12, 0 /* check dest alignment */ + beq L(srcunaligndstalign) + + /* both src and dst unaligned */ +#ifdef __LITTLE_ENDIAN__ + sld r8, r7, r10 + mr r11, r10 + addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ +#else + srd r8, r7, r10 + subfic r11, r10, 64 +#endif + /* dst alignment is greater then src alignment? */ + cmpd cr7, r12, r10 + ble cr7, L(dst_align_small) + /* src alignment is less than dst */ + + /* Calculate the dst alignment difference */ + subfic r7, r9, 8 + mtctr r7 + + /* Write until dst is aligned */ + cmpdi r0, r7, 4 + blt L(storebyte1) /* less than 4, store byte by byte */ + beq L(equal1) /* if its 4, store word */ + addi r0, r7, -4 /* greater than 4, so stb and stw */ + mtctr r0 +L(storebyte1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte1) + + subfic r7, r9, 8 /* Check the remaining bytes */ + cmpdi r0, r7, 4 + blt L(proceed1) + + .align 4 +L(equal1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r3, r3, 4 + addi r5, r5, -4 + +L(proceed1): + mr r7, r8 + /* calculate the Left over bytes to be written */ + subfic r11, r10, 64 + subfic r12, r12, 64 + subf r12, r12, r11 /* remaining bytes on second dw */ + subfic r10, r12, 64 /* remaining bytes on first dw */ + subfic r9, r9, 8 + subf r6, r9, r6 /* recalculate padding */ +L(srcunaligndstalign): + addi r3, r3, 1 + subfic r12, r10, 64 /* remaining bytes on second dw */ + addi r4, r4, 8 + li r0,0 + b L(storedouble) + + .align 4 +L(dst_align_small): + mtctr r6 + /* Write until src is aligned */ +L(storebyte2): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte2) + + addi r4, r4, 8 /* Increment src pointer */ + addi r3, r3, 1 /* Increment dst pointer */ + mr r9, r3 + li r8, 0 + cmpd cr7, r12, r10 + beq cr7, L(aligned) + rldicl r6, r3, 0, 61 /* Recalculate padding */ + mr r7, r6 + + /* src is algined */ +L(srcaligndstunalign): + mr r9, r3 + mr r6, r7 + ld r8, 0(r4) + subfic r10, r7, 8 + mr r7, r8 + li r0, 0 /* Check null */ + cmpb r0, r8, r0 + cmpdi r0, 0 + bne L(byte_by_byte) /* Do byte by byte if there is NULL */ + rlwinm r12, r3, 3,26,28 /* Calculate padding */ + addi r3, r3, -1 + /* write byte by byte until aligned */ +#ifdef __LITTLE_ENDIAN__ + li r11, -8 +#else + li r11, 64 +#endif + mtctr r10 + cmpdi r0, r10, 4 + blt L(storebyte) + beq L(equal) + addi r0, r10, -4 + mtctr r0 +L(storebyte): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte) + + cmpdi r0, r10, 4 + blt L(align) + + .align 4 +L(equal): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 +L(align): + addi r3, r3, 1 + addi r4, r4, 8 /* Increment src pointer */ + subfic r10, r12, 64 + li r0, 0 + /* dst addr aligned to 8 */ +L(storedouble): + cmpdi r5, 8 + ble L(null1) + ld r7, 0(r4) /* load next dw */ + cmpb r0, r7, r0 + cmpdi r0, 0 /* check for null on each new dw */ + bne L(null) +#ifdef __LITTLE_ENDIAN__ + srd r9, r8, r10 /* bytes from first dw */ + sld r11, r7, r12 /* bytes from second dw */ +#else + sld r9, r8, r10 + srd r11, r7, r12 +#endif + or r11, r9, r11 /* make as a single dw */ + std r11, 0(r3) /* store as std on aligned addr */ + mr r8, r7 /* still few bytes left to be written */ + addi r3, r3, 8 /* increment dst addr */ + addi r4, r4, 8 /* increment src addr */ + addi r5, r5, -8 + b L(storedouble) /* Loop until NULL */ + + .align 4 + +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(null): + addi r3, r3, -1 + mr r10, r12 + mtctr r6 +#ifdef __LITTLE_ENDIAN__ + subfic r10, r10, 64 + addi r10, r10, -8 +#endif + cmpdi r0, r5, 4 + blt L(loop) + cmpdi r0, r6, 4 + blt L(loop) + + /* we can still use stw if leftover >= 4 */ +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 + srd r11, r8, r10 +#else + subfic r10, r10, 64 + sld r11, r8, r10 + srdi r11, r11, 32 +#endif + stw r11, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 + cmpdi r0, r5, 0 + beq L(g1) + cmpdi r0, r6, 4 + beq L(bytebybyte1) + addi r10, r10, 32 +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, -8 +#else + subfic r10, r10, 64 +#endif + addi r0, r6, -4 + mtctr r0 + /* remaining byte by byte part of first dw */ +L(loop): +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 +#else + addi r10, r10, -8 +#endif + srd r0, r8, r10 + stbu r0, 1(r3) + addi r5, r5, -1 + cmpdi r0, r5, 0 + beq L(g1) + bdnz L(loop) +L(bytebybyte1): + addi r3, r3, 1 + /* remaining byte by byte part of second dw */ +L(bytebybyte): + addi r3, r3, -8 + addi r4, r4, -1 + +#ifdef __LITTLE_ENDIAN__ + extrdi. r0, r7, 8, 56 + stbu r7, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi r0, r7, 8, 0 + stbu r0, 1(r3) + addi r5, r5, -1 + b L(g2) +#else + extrdi. r0, r7, 8, 0 + stbu r0, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + stbu r7, 1(r3) + addi r5, r5, -1 + b L(g2) +#endif +L(g1): +#ifdef USE_AS_STPNCPY + addi r3, r3, 1 +#endif +L(g2): + addi r3, r3, 1 + mr r19, r3 + mr r8, r5 + b L(zeroFill) +L(null1): + mr r9, r3 + subf r4, r6, r4 + b L(byte_by_byte) +END(FUNC_NAME) +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S new file mode 100644 index 0000000000..a970b6ce30 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S @@ -0,0 +1,182 @@ +/* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNLEN +# define STRNLEN __strnlen +#endif + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (STRNLEN) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmpldi r4,32 + li r0,0 /* Doubleword with null chars. */ + addi r7,r7,-1 + + /* If we have less than 33 bytes to search, skip to a faster code. */ + ble L(small_range) + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr7,r10,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r5,r7,r8 + srdi r6,r5,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld cr6,r8,r7 + beq cr6,L(end_max) + + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + +L(end_max): + mr r3,r4 + blr + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + .align 4 +L(found): + cmpldi cr6,r10,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 + blelr + mr r3,r4 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r4,0 + beq L(end_max) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 + bne cr7,L(done) + + cmpld r8,r7 + beq L(end_max) + + .p2align 5 +L(loop_small): + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + cmpld r8,r7 + bne L(loop_small) + mr r3,r4 + blr + +END (STRNLEN) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S new file mode 100644 index 0000000000..c22393deb5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S @@ -0,0 +1,260 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strrchr (char *s [r3], int c [r4]) */ + +#ifndef STRRCHR +# define STRRCHR strrchr +#endif + + .machine power7 +ENTRY (STRRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* used to store last occurence */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now ,if its passed as more chars + check for null again */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* if there are more than one 0xff in r11, find the first pos of ff + in r11 and fill r10 with 0 from that position */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRRCHR) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c new file mode 100644 index 0000000000..a917b2157e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c @@ -0,0 +1,27 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S new file mode 100644 index 0000000000..260db2ed6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S @@ -0,0 +1,521 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Char * [r3] strstr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained using aligned memory access, load + * doubleword and usage of cmpb instruction for quicker comparison. */ + +#define ITERATIONS 64 + +#ifndef STRSTR +# define STRSTR strstr +#endif + +#ifndef STRLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + .machine power7 +EALIGN (STRSTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + mr r3, r4 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + mr r3, r29 + lbz r4, 0(r30) + bl STRCHR + nop + + mr r11, r3 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + /* Reg r28 is used to count the number of iterations. */ + li r28, 0 + rldicl r8, r3, 0, 52 /* Page cross check. */ + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + rldicl r8, r30, 0, 52 + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + /* If len(r4) < 8 handle in a different way. */ + /* Shift position based on null and use cmpb. */ + cmpdi cr7, r31, 8 + blt cr7, L(lessthan8) + + /* Len(r4) >= 8 reaches here. */ + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + li r0, 0 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + clrrdi r4, r4, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(begin1) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ +L(begin1): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r3, r3, 3 + ld r5, 0(r3) + cmpb r9, r0, r6 /* Check if input has null. */ + cmpdi cr7, r9, 0 + bne cr7, L(return3) + cmpb r9, r0, r5 /* Check if input has null. */ +#ifdef __LITTLE_ENDIAN__ + srd r9, r9, r10 +#else + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ + cmpdi cr7, r10, 0 + beq cr7, L(nextbyte1) + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + +L(nextbyte1): + ldu r7, 8(r3) /* Load next dw. */ + addi r12, r12, 8 /* Shift one byte and compare. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 /* Rotate based on mask. */ + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw from few bytes on first load and second load. */ + or r10, r9, r10 + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* Cmpb search str and input str. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(match) + addi r8, r8, 1 + b L(begin) + + .align 4 +L(match): + /* There is a match of 8 bytes, check next bytes. */ + cmpdi cr7, r31, 8 + beq cr7, L(return) + /* Update next starting point r8. */ + srdi r9, r11, 3 + subf r9, r9, r3 + mr r8, r9 + +L(secondmatch): + mr r5, r7 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed3) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ + cmpb r9, r0, r6 + sld r9, r9, r10 +#else + sld r6, r6, r10 + cmpb r9, r0, r6 + srd r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(proceed3) + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ + +L(proceed3): + li r7, 0 + addi r3, r3, 8 + cmpb r9, r0, r5 + cmpdi cr7, r9, 0 + bne cr7, L(proceed4) + ld r7, 0(r3) +L(proceed4): +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw with few bytes from first and second load. */ + or r10, r9, r10 + cmpb r9, r0, r6 + cmpdi cr7, r9, 0 + bne cr7, L(return4) + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* If the next 8 bytes dont match, start search again. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + bne cr7, L(reset) + /* If the next 8 bytes match, load and compare next 8. */ + b L(secondmatch) + + .align 4 +L(reset): + /* Start the search again. */ + addi r8, r8, 1 + b L(begin) + + .align 4 +L(return3): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r5, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r5, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + /* Start search again if there is no match. */ + bne cr7, L(begin) + /* If the words match, update return values. */ + subfic r7, r7, 64 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(return4): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r10, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r10, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + bne cr7, L(begin) + subfic r7, r7, 64 + srdi r11, r11, 3 + subf r3, r11, r3 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(begin): + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r8, r3 + mr r4, r30 /* Restore r4. */ + li r0, 0 + mr r6, r29 + clrrdi r4, r4, 3 + addi r4, r4, 8 + b L(begin1) + + /* Handle less than 8 search string. */ + .align 4 +L(lessthan8): + mr r4, r3 + mr r9, r30 + li r0, 0 + + rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */ + srdi r8, r10, 3 /* Padding in bytes. */ + clrrdi r9, r9, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r9) + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed2) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + subfic r8, r8, 8 + cmpd cr7, r8, r31 /* Next load needed? */ + bge cr7, L(proceed2) + ld r7, 8(r9) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r7, r7, r10 /* Discard unwanted bits. */ +#else + srd r7, r7, r10 +#endif + or r6, r6, r7 /* Form complete search str. */ +L(proceed2): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r7, r3, 3 /* Make r3 aligned. */ + ld r5, 0(r7) + sldi r8, r31, 3 + subfic r8, r8, 64 +#ifdef __LITTLE_ENDIAN__ + sld r6, r6, r8 + cmpb r9, r0, r5 + srd r9, r9, r10 +#else + srd r6, r6, r8 + cmpb r9, r0, r5 + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(noload) + cmpdi cr7, r10, 0 + beq cr7, L(continue) + ld r7, 8(r7) +L(continue1): + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + b L(nextbyte) + + .align 4 +L(continue): + ld r7, 8(r7) + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ +L(nextbyte): + addi r12, r12, 8 /* Mask for rotation. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 + or r10, r9, r10 + sld r10, r10, r8 + cmpb r9, r0, r10 + srd r9, r9, r8 +#else + sld r9, r5, r12 + srd r10, r7, r11 + or r10, r9, r10 + srd r10, r10, r8 + cmpb r9, r0, r10 + sld r9, r9, r8 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(end) + addi r3, r4, 1 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r4, r3 + mr r6, r29 + li r0, 0 + b L(proceed2) + + .align 4 +L(noload): + /* Reached null in r3, so skip next load. */ + li r7, 0 + b L(continue1) + + .align 4 +L(return): + /* Update return values. */ + srdi r9, r11, 3 + subf r3, r9, r3 + b L(end) + + /* Handling byte by byte. */ + .align 4 +L(bytebybyte): + mr r8, r3 + addi r8, r8, -1 +L(loop1): + addi r8, r8, 1 + mr r3, r8 + mr r4, r30 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) +L(loop): + lbz r5, 0(r3) + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpld cr7, r6, r5 + bne cr7, L(loop1) + addi r3, r3, 1 + addi r4, r4, 1 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce len of r4 from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return r3. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Return NULL. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strstr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r28, -32(r1) /* Restore callers save register r28. */ + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + mtlr r0 /* Branch to link register. */ + blr +END (STRSTR) +libc_hidden_builtin_def (strstr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S new file mode 100644 index 0000000000..848dad5718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S @@ -0,0 +1,23 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_SUB +#include "add_n.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies new file mode 100644 index 0000000000..9a5e3c7277 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power7/fpu +powerpc/powerpc64/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile new file mode 100644 index 0000000000..71a59529f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),string) +sysdep_routines += strcasestr-ppc64 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies new file mode 100644 index 0000000000..1187cdfb0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S new file mode 100644 index 0000000000..4c42926a74 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S @@ -0,0 +1,303 @@ +/* Optimized expf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Short algorithm description: + * + * Let K = 64 (table size). + * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) + * where: + * x = m*log(2)/K + y, y in [0.0..log(2)/K] + * m = n*K + j, m,n,j - signed integer, j in [0..K-1] + * values of 2^(j/K) are tabulated as T[j]. + * + * P(y) is a minimax polynomial approximation of expf(y)-1 + * on small interval [0.0..log(2)/K]. + * + * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as + * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y + * + * Special cases: + * expf(NaN) = NaN + * expf(+INF) = +INF + * expf(-INF) = 0 + * expf(x) = 1 for subnormals + * for finite argument, only expf(0)=1 is exact + * expf(x) overflows if x>88.7228317260742190 + * expf(x) underflows if x<-103.972076416015620 + */ + +#define C1 0x42ad496b /* Single precision 125*log(2). */ +#define C2 0x31800000 /* Single precision 2^(-28). */ +#define SP_INF 0x7f800000 /* Single precision Inf. */ +#define SP_EXP_BIAS 0x1fc0 /* Single precision exponent bias. */ + +#define DATA_OFFSET r9 + +/* Implements the function + + float [fp1] expf (float [fp1] x) */ + + .machine power8 +EALIGN(__ieee754_expf, 4, 0) + addis DATA_OFFSET,r2,.Lanchor@toc@ha + addi DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 /* r8 = x */ + lfd fp2,(.KLN2-.Lanchor)(DATA_OFFSET) + lfd fp3,(.P2-.Lanchor)(DATA_OFFSET) + rldicl r3,r8,32,33 /* r3 = |x| */ + lis r4,C1@ha /* r4 = 125*log(2) */ + ori r4,r4,C1@l + cmpw r3,r4 + lfd fp5,(.P3-.Lanchor)(DATA_OFFSET) + lfd fp4,(.RS-.Lanchor)(DATA_OFFSET) + fmadd fp2,fp1,fp2,fp4 /* fp2 = x * K/log(2) + (2^23 + 2^22) */ + bge L(special_paths) /* |x| >= 125*log(2) ? */ + + lis r4,C2@ha + ori r4,r4,C2@l + cmpw r3,r4 + blt L(small_args) /* |x| < 2^(-28) ? */ + + /* Main path: here if 2^(-28) <= |x| < 125*log(2) */ + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + lis r4,SP_EXP_BIAS@ha + ori r4,r4,SP_EXP_BIAS@l + add r3,r3,r4 + rldic r3,r3,49,1 /* r3 = 2^n */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + xscvspdp v1,v1 + fmul fp4,fp4,fp2 /* fp4 = (P3 * z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + P(y)) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + P(y)) */ + frsp fp1,fp1 + blr + + .align 4 +/* x is either underflow, overflow, infinite or NaN. */ +L(special_paths): + srdi r8,r8,32 + rlwinm r8,r8,3,29,29 /* r8 = 0, if x positive. + r8 = 4, otherwise. */ + addi r6,DATA_OFFSET,(.SPRANGE-.Lanchor) + lwzx r4,r6,r8 /* r4 = .SPRANGE[signbit(x)] */ + cmpw r3,r4 + /* |x| <= .SPRANGE[signbit(x)] */ + ble L(near_under_or_overflow) + + lis r4,SP_INF@ha + ori r4,r4,SP_INF@l + cmpw r3,r4 + bge L(arg_inf_or_nan) /* |x| > Infinite ? */ + + addi r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor) + lfsx fp1,r6,r8 + fmuls fp1,fp1,fp1 + blr + + + .align 4 +L(small_args): + /* expf(x) = 1.0, where |x| < |2^(-28)| */ + lfs fp2,(.SPone-.Lanchor)(DATA_OFFSET) + fadds fp1,fp1,fp2 + blr + + + .align 4 +L(arg_inf_or_nan:) + bne L(arg_nan) + + /* expf(+INF) = +INF + expf(-INF) = 0 */ + addi r6,DATA_OFFSET,(.INF_ZERO-.Lanchor) + lfsx fp1,r6,r8 + blr + + + .align 4 +L(arg_nan): + /* expf(NaN) = NaN */ + fadd fp1,fp1,fp1 + frsp fp1,fp1 + blr + + .align 4 +L(near_under_or_overflow): + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + ld r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET) + add r3,r3,r4 + rldic r3,r3,46,1 /* r3 = 2 */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + fmul fp4,fp4,fp2 /* fp4 = (P3*z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + T[j]) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + T[j]) */ + frsp fp1,fp1 + blr +END(__ieee754_expf) + + .section .rodata, "a",@progbits +.Lanchor: + .balign 8 +/* Table T[j] = 2^(j/K). Double precision. */ +.Ttable: + .8byte 0x3ff0000000000000 + .8byte 0x3ff02c9a3e778061 + .8byte 0x3ff059b0d3158574 + .8byte 0x3ff0874518759bc8 + .8byte 0x3ff0b5586cf9890f + .8byte 0x3ff0e3ec32d3d1a2 + .8byte 0x3ff11301d0125b51 + .8byte 0x3ff1429aaea92de0 + .8byte 0x3ff172b83c7d517b + .8byte 0x3ff1a35beb6fcb75 + .8byte 0x3ff1d4873168b9aa + .8byte 0x3ff2063b88628cd6 + .8byte 0x3ff2387a6e756238 + .8byte 0x3ff26b4565e27cdd + .8byte 0x3ff29e9df51fdee1 + .8byte 0x3ff2d285a6e4030b + .8byte 0x3ff306fe0a31b715 + .8byte 0x3ff33c08b26416ff + .8byte 0x3ff371a7373aa9cb + .8byte 0x3ff3a7db34e59ff7 + .8byte 0x3ff3dea64c123422 + .8byte 0x3ff4160a21f72e2a + .8byte 0x3ff44e086061892d + .8byte 0x3ff486a2b5c13cd0 + .8byte 0x3ff4bfdad5362a27 + .8byte 0x3ff4f9b2769d2ca7 + .8byte 0x3ff5342b569d4f82 + .8byte 0x3ff56f4736b527da + .8byte 0x3ff5ab07dd485429 + .8byte 0x3ff5e76f15ad2148 + .8byte 0x3ff6247eb03a5585 + .8byte 0x3ff6623882552225 + .8byte 0x3ff6a09e667f3bcd + .8byte 0x3ff6dfb23c651a2f + .8byte 0x3ff71f75e8ec5f74 + .8byte 0x3ff75feb564267c9 + .8byte 0x3ff7a11473eb0187 + .8byte 0x3ff7e2f336cf4e62 + .8byte 0x3ff82589994cce13 + .8byte 0x3ff868d99b4492ed + .8byte 0x3ff8ace5422aa0db + .8byte 0x3ff8f1ae99157736 + .8byte 0x3ff93737b0cdc5e5 + .8byte 0x3ff97d829fde4e50 + .8byte 0x3ff9c49182a3f090 + .8byte 0x3ffa0c667b5de565 + .8byte 0x3ffa5503b23e255d + .8byte 0x3ffa9e6b5579fdbf + .8byte 0x3ffae89f995ad3ad + .8byte 0x3ffb33a2b84f15fb + .8byte 0x3ffb7f76f2fb5e47 + .8byte 0x3ffbcc1e904bc1d2 + .8byte 0x3ffc199bdd85529c + .8byte 0x3ffc67f12e57d14b + .8byte 0x3ffcb720dcef9069 + .8byte 0x3ffd072d4a07897c + .8byte 0x3ffd5818dcfba487 + .8byte 0x3ffda9e603db3285 + .8byte 0x3ffdfc97337b9b5f + .8byte 0x3ffe502ee78b3ff6 + .8byte 0x3ffea4afa2a490da + .8byte 0x3ffefa1bee615a27 + .8byte 0x3fff50765b6e4540 + .8byte 0x3fffa7c1819e90d8 + +.KLN2: + .8byte 0x40571547652b82fe /* Double precision K/log(2). */ + +/* Double precision polynomial coefficients. */ +.P0: + .8byte 0x3fefffffffffe7c6 +.P1: + .8byte 0x3fe00000008d6118 +.P2: + .8byte 0x3fc55550da752d4f +.P3: + .8byte 0x3fa56420eb78fa85 + +.RS: + .8byte 0x4168000000000000 /* Double precision 2^23 + 2^22. */ +.NLN2K: + .8byte 0xbf862e42fefa39ef /* Double precision -log(2)/K. */ +.DP_EXP_BIAS: + .8byte 0x000000000000ffc0 /* Double precision exponent bias. */ + + .balign 4 +.SPone: + .4byte 0x3f800000 /* Single precision 1.0. */ +.SP_RS: + .4byte 0x4b400000 /* Single precision 2^23 + 2^22. */ + +.SPRANGE: /* Single precision overflow/underflow bounds. */ + .4byte 0x42b17217 /* if x>this bound, then result overflows. */ + .4byte 0x42cff1b4 /* if x<this bound, then result underflows. */ + +.SPLARGE_SMALL: + .4byte 0x71800000 /* 2^100. */ + .4byte 0x0d800000 /* 2^-100. */ + +.INF_ZERO: + .4byte 0x7f800000 /* Single precision Inf. */ + .4byte 0 /* Single precision zero. */ + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..7fd86fdf87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S new file mode 100644 index 0000000000..8dfa0076e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S @@ -0,0 +1,508 @@ +/* Optimized cosf(). PowerPC64/POWER8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^23 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] cosf (float [fp1] x) */ + + .machine power8 +EALIGN(__cosf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp1,fp2,fp4,fp3 /* 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + addi r7,r7,2 + rldicl r4,r7,62,63 /* ((n+3) >> 2) & 1 */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + 1.0+x^2*(CC0+x^3*CC1). */ + + lfd fp10,(L(CC0)-L(anchor))(r9) + lfd fp11,(L(CC1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + lfd fp1,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp3,fp11,fp10 /* CC0+x^3*CC1 */ + fmadd fp1,fp2,fp4,fp1 /* 1.0+x^2*(CC0+x^3*CC1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + /* Handle some special cases: + + cosf(subnormal) raises inexact + cosf(min_normalized) raises inexact + cosf(normalized) raises inexact. */ + + lfd fp2,(L(DPone)-L(anchor))(r9) + + fabs fp1,fp1 /* |x| */ + fsub fp1,fp2,fp1 /* 1.0-|x| */ + + frsp fp1,fp1 + + blr + +END (__cosf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for cos, range 2^-27 - 2^-5. */ +L(CC0): .8byte 0xbfdfffffff5cc6fd +L(CC1): .8byte 0x3fa55514b178dac5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S new file mode 100644 index 0000000000..fcdcb60293 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S @@ -0,0 +1,56 @@ +/* isfinite(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __finite ([fp1] x) */ + +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x8010 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + add r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S new file mode 100644 index 0000000000..32814e4525 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S @@ -0,0 +1,61 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isinf([fp1] x) */ + +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 /* r9 = 0x7ff0 */ + rldicl r10,r3,0,1 /* r10 = r3 & (0x8000000000000000) */ + sldi r9,r9,32 /* r9 = r9 << 52 */ + cmpd cr7,r10,r9 /* fp1 & 0x7ff0000000000000 ? */ + beq cr7,L(inf) + li r3,0 /* Not inf */ + blr +L(inf): + sradi r3,r3,63 /* r3 = r3 >> 63 */ + ori r3,r3,1 /* r3 = r3 | 0x1 */ + blr +END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S new file mode 100644 index 0000000000..af52e502b7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isnan([f1] x) */ + +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + subf r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S new file mode 100644 index 0000000000..aa180b6901 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S @@ -0,0 +1,45 @@ +/* Round double to long int. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp1,fp1 + MFVSRD_R3_V1 + blr +END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S new file mode 100644 index 0000000000..043fc6a089 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S @@ -0,0 +1,48 @@ +/* llround function. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long [r3] llround (float x [fp1]) */ + +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp1,fp1 /* Round to nearest +-0.5. */ + fctidz fp1,fp1 /* Convert To Integer DW round toward 0. */ + MFVSRD_R3_V1 + blr +END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S new file mode 100644 index 0000000000..fb0add3462 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S @@ -0,0 +1,519 @@ +/* Optimized sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^27 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] sinf (float [fp1] x) */ + + .machine power8 +EALIGN(__sinf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + rldicl r4,r7,62,63 /* ((n+1) >> 2) & 1 */ + + /* We are operating on |x|, so we need to add back the original + sign. */ + rldicl r8,r8,33,63 /* (x >> 31) & 1, ie the sign bit. */ + xor r4,r4,r8 /* 0 if result should be positive, + 1 if negative. */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + x+x^3*(SS0+x^2*SS1). */ + + lfd fp10,(L(SS0)-L(anchor))(r9) + lfd fp11,(L(SS1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp11,fp10 /* SS0+x^2*SS1 */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(SS0+x^2*SS1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + cmpwi r3,0 + beq L(zero) + + /* Handle some special cases: + + sinf(subnormal) raises inexact/underflow + sinf(min_normalized) raises inexact/underflow + sinf(normalized) raises inexact. */ + + lfd fp2,(L(small)-L(anchor))(r9) + + fmul fp2,fp1,fp2 /* x * small */ + fsub fp1,fp1,fp2 /* x - x * small */ + + frsp fp1,fp1 + + blr + + .balign 16 +L(zero): + blr + +END (__sinf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for sin, range 2^-27 - 2^-5. */ +L(SS0): .8byte 0xbfc555555543d49d +L(SS1): .8byte 0x3f8110f475cec8c5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S new file mode 100644 index 0000000000..46b9c0067a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S @@ -0,0 +1,1447 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* First string arg. */ +#define rSTR2 r4 /* Second string arg. */ +#define rN r5 /* Max string length. */ +#define rWORD1 r6 /* Current word in s1. */ +#define rWORD2 r7 /* Current word in s2. */ +#define rWORD3 r8 /* Next word in s1. */ +#define rWORD4 r9 /* Next word in s2. */ +#define rWORD5 r10 /* Next word in s1. */ +#define rWORD6 r11 /* Next word in s2. */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* Next word in s1. */ +#define rWORD8 r31 /* Next word in s2. */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r10, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 8 + clrldi. r0, r10, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 + /* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + bne L(unalignedqw) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. */ + + .align 4 +L(samealignment): + or r11, rSTR2, rSTR1 + clrldi. r11, r11, 60 + beq L(qw_align) + /* Try to align to QW else proceed to DW loop. */ + clrldi. r10, r10, 60 + bne L(DW) + /* For the difference to reach QW alignment, load as DW. */ + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + subfic r10, r12, 8 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + sldi r9, r10, 3 + subfic r9, r9, 64 + sld rWORD1, rWORD1, r9 + sld rWORD2, rWORD2, r9 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(ret_diff) + subf rN, r10, rN + + cmpld cr6, r11, r12 + bgt cr6, L(qw_align) + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(different) + cmpldi cr6, rN, 8 + ble cr6, L(zeroLength) + addi rN, rN, -8 + /* Now both rSTR1 and rSTR2 are aligned to QW. */ + .align 4 +L(qw_align): + vspltisb v0, 0 + srdi. r6, rN, 6 + li r8, 16 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64) + mtctr r6 + vspltisb v8, 0 + vspltisb v6, 0 + /* Aligned vector loop. */ + .align 4 +L(aligned_loop): + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r8 + lvx v8, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + lvx v4, rSTR1, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r11 + lvx v8, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(aligned_loop) + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + clrldi rN, rN, 58 + /* Handle remainder for aligned loop. */ + .align 4 +L(lessthan64): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v5, 0, rSTR2 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v5, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + lvx v5, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + + /* Calculate and return the difference. */ + .align 4 +L(different1): + cmpdi cr6, rN, 16 + bge cr6, L(different2) + /* Discard unwanted bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v1, 0, rN + vperm v4, v4, v0, v1 + vperm v5, v5, v0, v1 +#else + lvsl v1, 0, rN + vperm v4, v0, v4, v1 + vperm v5, v0, v5, v1 +#endif + vcmpequb. v7, v4, v5 + li rRTN, 0 + bltlr cr6 + .align 4 +L(different2): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + lvsl v10, r0, r0 + vspltisb v8, 15 + vsububm v9, v8, v10 + vperm v4, v4, v0, v9 + vperm v5, v5, v0, v9 +#endif + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v4, v4, v4, 8 + vsldoi v5, v5, v5, 8 + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 +L(ret_diff): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(different3): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + vspltisb v9, 15 + lvsl v10, r0, r0 + vsububm v9, v9, v10 + vperm v6, v6, v0, v9 + vperm v8, v8, v0, v9 +#endif + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v6, v6, v6, 8 + vsldoi v8, v8, v8, 8 + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(different): + cmpldi cr7, rN, 8 + bgt cr7, L(end) + /* Skip unwanted bytes. */ + sldi r8, rN, 3 + subfic r8, r8, 64 + srd rWORD1, rWORD1, r8 + srd rWORD2, rWORD2, r8 + cmpld cr6, rWORD1, rWORD2 + li rRTN, 0 + beqlr cr6 +L(end): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(unalignedqw): + /* Proceed to DW unaligned loop,if there is a chance of pagecross. */ + rldicl r9, rSTR1, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + rldicl r9, rSTR2, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + li r0, 0 + li r8, 16 + vspltisb v0, 0 + /* Check if rSTR1 is aligned to QW. */ + andi. r11, rSTR1, 0xF + beq L(s1_align) + + /* Compare 16B and align S1 to QW. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + lvsr v6, 0, rSTR2 /* Compute mask. */ +#else + lvsl v10, 0, rSTR1 /* Compute mask. */ + lvsl v6, 0, rSTR2 /* Compute mask. */ +#endif + lvx v5, 0, rSTR2 + lvx v9, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif + lvx v4, 0, rSTR1 + lvx v9, rSTR1, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + cmpldi cr6, rN, 16 + ble cr6, L(zeroLength) + subfic r11, r11, 16 + subf rN, r11, rN + add rSTR1, rSTR1, r11 + add rSTR2, rSTR2, r11 + + /* As s1 is QW aligned prepare for unaligned loop. */ + .align 4 +L(s1_align): +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + lvx v5, 0, rSTR2 + srdi. r6, rN, 6 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64_unalign) + mtctr r6 + li r9, 64 + /* Unaligned vector loop. */ + .align 4 +L(unalign_qwloop): + lvx v4, 0, rSTR1 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r11 + lvx v10, rSTR2, r9 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(unalign_qwloop) + clrldi rN, rN, 58 + /* Handle remainder for unaligned loop. */ + .align 4 +L(lessthan64_unalign): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + addi r11, r11, 16 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + +/* Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(DW): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8. */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16. */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24. */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder. */ + srdi r0, rN, 5 /* Divide by 32. */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8. */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16. */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24. */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8. */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24. */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32. */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder. */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8. */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24. */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S new file mode 100644 index 0000000000..bc734c9f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S @@ -0,0 +1,458 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */ + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + + /* No need to use .machine power8 since mtvsrd is already + handled by the define. It avoid breakage on binutils + that does not support this machine specifier. */ + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,r5,31 + neg r0,r3 + mr r10,r3 + + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 /* Replicate byte to word. */ + ble cr7,L(write_LT_32) + + andi. r11,r10,15 /* Check alignment of DST. */ + insrdi r4,r4,32,0 /* Replicate word to double word. */ + + beq L(big_aligned) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + + /* Get DST aligned to 16 bytes. */ +1: bf 31,2f + stb r4,0(r10) + addi r10,r10,1 + +2: bf 30,4f + sth r4,0(r10) + addi r10,r10,2 + +4: bf 29,8f + stw r4,0(r10) + addi r10,r10,4 + +8: bf 28,16f + std r4,0(r10) + addi r10,r10,8 + +16: subf r5,r0,r5 + + .align 4 +L(big_aligned): + /* For sizes larger than 255 two possible paths: + - if constant is '0', zero full cache lines with dcbz + - otherwise uses vector instructions. */ + cmpldi cr5,r5,255 + dcbtst 0,r10 + cmpldi cr6,r4,0 + crand 27,26,21 + bt 27,L(huge_dcbz) + bge cr5,L(huge_vector) + + + /* Size between 32 and 255 bytes with constant different than 0, use + doubleword store instruction to achieve best throughput. */ + srdi r8,r5,5 + clrldi r11,r5,59 + cmpldi cr6,r11,0 + cmpdi r8,0 + beq L(tail_bytes) + mtctr r8 + + /* Main aligned write loop, writes 32-bytes at a time. */ + .align 4 +L(big_loop): + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + bdz L(tail_bytes) + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,10,32 + bdnz L(big_loop) + + b L(tail_bytes) + + /* Write remaining 1~31 bytes. */ + .align 4 +L(tail_bytes): + beqlr cr6 + + srdi r7,r11,4 + clrldi r8,r11,60 + mtocrf 0x01,r7 + + .align 4 + bf 31,8f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +8: mtocrf 0x1,r8 + bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw 4,0(10) + addi 10,10,4 + + .align 4 +2: bf 30,1f + sth 4,0(10) + addi 10,10,2 + + .align 4 +1: bflr 31 + stb 4,0(10) + blr + + /* Size larger than 255 bytes with constant different than 0, use + vector instruction to achieve best throughput. */ +L(huge_vector): + /* Replicate set byte to quadword in VMX register. */ + MTVSRD_V1_R4 + xxpermdi 32,v0,v1,0 + vspltb v2,v0,15 + + /* Main aligned write loop: 128 bytes at a time. */ + li r6,16 + li r7,32 + li r8,48 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail) + mtctr r12 + b L(aligned_128loop) + + .align 4 +L(aligned_128loop): + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + bdnz L(aligned_128loop) + + /* Write remaining 1~127 bytes. */ +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + +32: bf 26,16f + stvx v2,0,r10 + stvx v2,r10,r6 + addi r10,r10,32 + +16: bf 27,8f + stvx v2,0,r10 + addi r10,r10,16 + +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + /* Copies 4~7 bytes. */ +4: bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + /* Return original DST pointer. */ + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out a full cacheline of 128 bytes at a time. + Before using dcbz though, we need to get the destination 128-byte + aligned. */ + .align 4 +L(huge_dcbz): + andi. r11,r10,127 + neg r0,r10 + beq L(huge_dcbz_aligned) + + clrldi r0,r0,57 + subf r5,r0,r5 + srdi r0,r0,3 + mtocrf 0x01,r0 + + /* Write 1~128 bytes until DST is aligned to 128 bytes. */ +8: bf 28,4f + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 29,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 30,1f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +1: bf 31,L(huge_dcbz_aligned) + std r4,0(r10) + addi r10,r10,8 + +L(huge_dcbz_aligned): + /* Setup dcbz unroll offsets and count numbers. */ + srdi r8,r5,9 + clrldi r11,r5,55 + cmpldi cr6,r11,0 + li r9,128 + cmpdi r8,0 + beq L(huge_tail) + li r7,256 + li r6,384 + mtctr r8 + + .align 4 +L(huge_loop): + /* Sets 512 bytes to zero in each iteration, the loop unrolling shows + a throughput boost for large sizes (2048 bytes or higher). */ + dcbz 0,r10 + dcbz r9,r10 + dcbz r7,r10 + dcbz r6,r10 + addi r10,r10,512 + bdnz L(huge_loop) + + beqlr cr6 + +L(huge_tail): + srdi r6,r11,8 + srdi r7,r11,4 + clrldi r8,r11,4 + cmpldi cr6,r8,0 + mtocrf 0x01,r6 + + beq cr6,L(tail) + + /* We have 1~511 bytes remaining. */ + .align 4 +32: bf 31,16f + dcbz 0,r10 + dcbz r9,r10 + addi r10,r10,256 + + .align 4 +16: mtocrf 0x01,r7 + bf 28,8f + dcbz 0,r10 + addi r10,r10,128 + + .align 4 +8: bf 29,4f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 30,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 31,L(tail) + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + .align 4 + + /* Remaining 1~15 bytes. */ +L(tail): + mtocrf 0x01,r8 + + .align +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw r4,0(r10) + addi r10,r10,4 + + .align 4 +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handle short copies of 0~31 bytes. Best throughput is achieved + by just unrolling all operations. */ + .align 4 +L(write_LT_32): + cmpldi cr6,5,8 + mtocrf 0x01,r5 + ble cr6,L(write_LE_8) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(write_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,r0 + subf r5,r0,r5 + +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + +1: bf 31,L(end_4bytes_alignment) + stb r4,0(r10) + addi r10,r10,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(write_LT_32_aligned): + blt cr1,8f + + stw r4,0(r10) + stw r4,4(r10) + stw r4,8(r10) + stw r4,12(r10) + addi r10,r10,16 + +8: bf 28,L(tail4) + stw r4,0(r10) + stw r4,4(r10) + addi r10,r10,8 + + .align 4 + /* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + blr + + .align 4 + /* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + sth r4,0(r10) + bflr 31 + stb r4,2(r10) + blr + + .align 4 +L(tail5): + bflr 31 + stb r4,4(r10) + blr + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(write_LE_8): + bne cr6,L(tail4) + + stw r4,0(r10) + stw r4,4(r10) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies new file mode 100644 index 0000000000..1fc7b7cd39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S new file mode 100644 index 0000000000..955e738cee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S @@ -0,0 +1,24 @@ +/* Optimized stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPCPY +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S new file mode 100644 index 0000000000..c14d984dd0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S new file mode 100644 index 0000000000..88b17a6eb1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S @@ -0,0 +1,457 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ + +#ifndef USE_AS_STRNCASECMP +# define __STRCASECMP __strcasecmp +# define STRCASECMP strcasecmp +#else +# define __STRCASECMP __strncasecmp +# define STRCASECMP strncasecmp +#endif +/* Convert 16 bytes to lowercase and compare */ +#define TOLOWER() \ + vaddubm v8, v4, v1; \ + vaddubm v7, v4, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v4, v7, v4, v8; \ + vaddubm v8, v5, v1; \ + vaddubm v7, v5, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v5, v7, v5, v8; \ + vcmpequb. v7, v5, v4; + +/* + * Get 16 bytes for unaligned case. + * reg1: Vector to hold next 16 bytes. + * reg2: Address to read from. + * reg3: Permute control vector. + * v8: Tmp vector used to mask unwanted bytes. + * v9: Tmp vector,0 when null is found on first 16 bytes + */ +#ifdef __LITTLE_ENDIAN__ +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, v8, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; +#else +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, reg1, v8, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, reg1, v9, reg3; +#endif + +/* Check null in v4, v5 and convert to lower. */ +#define CHECKNULLANDCONVERT() \ + vcmpequb. v7, v0, v5; \ + beq cr6, 3f; \ + vcmpequb. v7, v0, v4; \ + beq cr6, 3f; \ + b L(null_found); \ + .align 4; \ +3: \ + TOLOWER() + +#ifdef _ARCH_PWR8 +# define VCLZD_V8_v7 vclzd v8, v7; +# define MFVRD_R3_V1 mfvrd r3, v1; +# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; +# define VPOPCNTD_V8_V8 vpopcntd v8, v8; +# define VADDUQM_V7_V8 vadduqm v9, v7, v8; +#else +# define VCLZD_V8_v7 .long 0x11003fc2 +# define MFVRD_R3_V1 .long 0x7c230067 +# define VSUBUDM_V9_V8 .long 0x112944c0 +# define VPOPCNTD_V8_V8 .long 0x110047c3 +# define VADDUQM_V7_V8 .long 0x11274100 +#endif + + .machine power7 + +ENTRY (__STRCASECMP) +#ifdef USE_AS_STRNCASECMP + CALL_MCOUNT 3 +#else + CALL_MCOUNT 2 +#endif +#define rRTN r3 /* Return value */ +#define rSTR1 r10 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, rRTN, rSTR2 + + /* Get locale address. */ + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) + + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) +#endif + vspltisb v0, 0 + vspltisb v8, -1 + /* Check for null in initial characters. + Check max of 16 char depending on the alignment. + If null is present, proceed byte by byte. */ + lvx v4, 0, rSTR1 +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ +#else + lvsl v10, 0, rSTR1 + vperm v9, v4, v8, v10 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + lvx v5, 0, rSTR2 + /* Calculate alignment. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 + vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ +#else + lvsl v6, 0, rSTR2 + vperm v9, v5, v8, v6 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + /* Check if locale has non ascii characters. */ + ld rTMP, 0(rLOC) + addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz rTMP, 0(r6) + cmpdi cr7, rTMP, 1 + beq cr7, L(bytebybyte) + + /* Load vector registers with values used for TOLOWER. */ + /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ + vspltisb v3, 2 + vspltisb v9, 4 + vsl v3, v3, v9 + vaddubm v1, v3, v3 + vnor v1, v1, v1 + vspltisb v2, 7 + vsububm v2, v3, v2 + + andi. rADDR1, rSTR1, 0xF + beq cr0, L(align) + addi r6, rSTR1, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif +L(align): + andi. rADDR2, rSTR2, 0xF + beq cr0, L(align1) + addi r6, rSTR2, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif +L(align1): + CHECKNULLANDCONVERT() + blt cr6, L(match) + b L(different) + .align 4 +L(match): + clrldi r6, rSTR1, 60 + subfic r7, r6, 16 +#ifdef USE_AS_STRNCASECMP + sub r5, r5, r7 +#endif + add rSTR1, rSTR1, r7 + add rSTR2, rSTR2, r7 + andi. rADDR2, rSTR2, 0xF + addi rSTR1, rSTR1, -16 + addi rSTR2, rSTR2, -16 + beq cr0, L(aligned) +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, check for null, + convert to lowercase and compare. Loop till difference + or null occurs. */ +L(s1_align): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + GET16BYTES(v5, rSTR2, v6) + CHECKNULLANDCONVERT() + blt cr6, L(s1_align) + b L(different) + .align 4 +L(aligned): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + CHECKNULLANDCONVERT() + blt cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + vaddubm v1, v3, v3 + vcmpequb v7, v0, v7 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) + vsro v8, v8, v1 +#endif + b L(skipsum) + .align 4 +L(shift8): + vsumsws v8, v8, v0 +L(skipsum): +#ifdef __LITTLE_ENDIAN__ + /* Shift registers based on leading zero count. */ + vsro v6, v5, v8 + vsro v7, v4, v8 + /* Merge and move to GPR. */ + vmrglb v6, v6, v7 + vslo v1, v6, v1 + MFVRD_R3_V1 + /* Place the characters that are different in first position. */ + sldi rSTR2, rRTN, 56 + srdi rSTR2, rSTR2, 56 + sldi rSTR1, rRTN, 48 + srdi rSTR1, rSTR1, 56 +#else + vslo v6, v5, v8 + vslo v7, v4, v8 + vmrghb v1, v6, v7 + MFVRD_R3_V1 + srdi rSTR2, rRTN, 48 + sldi rSTR2, rSTR2, 56 + srdi rSTR2, rSTR2, 56 + srdi rSTR1, rRTN, 56 +#endif + subf rRTN, rSTR1, rSTR2 + extsw rRTN, rRTN + blr + + .align 4 + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of junk beyond + the end of the strings... */ +L(null_found): + vaddubm v10, v3, v3 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) + vsro v8, v8, v10 +#endif + b L(skipsum1) + .align 4 +L(shift_8): + vsumsws v8, v8, v0 +L(skipsum1): + /* Calculate shift count based on count of zero. */ + vspltisb v10, 7 + vslb v10, v10, v10 + vsldoi v9, v0, v10, 1 + VSUBUDM_V9_V8 + vspltisb v8, 8 + vsldoi v8, v0, v8, 1 + VSUBUDM_V9_V8 + /* Shift and remove junk after null character. */ +#ifdef __LITTLE_ENDIAN__ + vslo v5, v5, v9 + vslo v4, v4, v9 +#else + vsro v5, v5, v9 + vsro v4, v4, v9 +#endif + /* Convert and compare 16 bytes. */ + TOLOWER() + blt cr6, L(retnull) + b L(different) + .align 4 +L(retnull): + li rRTN, 0 + blr + .align 4 +L(bytebybyte): + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + rldicl rTMP, r5, 62, 2 + cmpdi cr7, rTMP, 0 + beq cr7, L(lessthan4) + mtctr rTMP +#endif +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + bdnz L(loop) +#else + b L(loop) +#endif +#ifdef USE_AS_STRNCASECMP +L(lessthan4): + clrldi r5, r5, 62 + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + mtctr r5 +L(loop1): + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + addi rSTR1, rSTR1, 1 + addi rSTR2, rSTR2, 1 + lbz rCHAR1, 0(rSTR1) + lbz rCHAR2, 0(rSTR2) + bdnz L(loop1) +#endif +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCASECMP) + +weak_alias (__STRCASECMP, STRCASECMP) +libc_hidden_builtin_def (__STRCASECMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c new file mode 100644 index 0000000000..0e746b7718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c @@ -0,0 +1,29 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +#undef weak_alias +#define weak_alias(a,b) +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S new file mode 100644 index 0000000000..6ac6572f3b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S @@ -0,0 +1,538 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* Char * [r3] strcasestr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained by comparing 16 bytes. */ + +/* When the first char of r4 is hit ITERATIONS times in r3 + fallback to default. */ +#define ITERATIONS 64 + +#ifndef STRCASESTR +# define STRCASESTR __strcasestr +#endif + +#ifndef STRLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +/* Convert 16 bytes of v4 and reg to lowercase and compare. */ +#define TOLOWER(reg) \ + vcmpgtub v6, v4, v1; \ + vcmpgtub v7, v2, v4; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor v4, v8, v4; \ + vcmpgtub v6, reg, v1; \ + vcmpgtub v7, v2, reg; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor reg, v8, reg; \ + vcmpequb. v6, reg, v4; + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#ifdef _ARCH_PWR8 +#define VCLZD_V8_v7 vclzd v8, v7; +#else +#define VCLZD_V8_v7 .long 0x11003fc2 +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRCASESTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r27, -40(r1) /* Save callers register r27. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r29, -24) + cfi_offset(r28, -32) + cfi_offset(r27, -40) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 /* Input validation. */ + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + /* Load first byte from r4 and check if its null. */ + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(ret_r3) + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r9, LOCALE_CTYPE_TOUPPER(r9) + sldi r10, r6, 2 /* Convert to upper case. */ + lwzx r28, r9, r10 + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + sldi r10, r6, 2 /* Convert to lower case. */ + lwzx r27, r11, r10 + + /* Check if the first char is present. */ + mr r4, r27 + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + mr r4, r28 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(firstpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck) + cmpw cr7, r3, r29 + ble cr7, L(firstpos) + /* Move r3 to the first occurence. */ +L(skipcheck): + mr r3, r29 +L(firstpos): + mr r29, r3 + + sldi r9, r27, 8 + or r28, r9, r28 + /* Reg r27 is used to count the number of iterations. */ + li r27, 0 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + + /* Find the length of pattern. */ + mr r3, r30 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + + mr r3, r29 + + /* Locales not matching ASCII for single bytes. */ + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r7, 0(r9) + addi r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz r8, 0(r7) + cmpdi cr7, r8, 1 + beq cr7, L(bytebybyte) + + /* If len(r4) < 16 handle byte by byte. */ + /* For shorter strings we will not use vector registers. */ + cmpdi cr7, r31, 16 + blt cr7, L(bytebybyte) + + /* Comparison values used for TOLOWER. */ + /* Load v1 = 64('A' - 1), v2 = 91('Z' + 1), v3 = 32 in each byte. */ + vspltish v0, 0 + vspltisb v5, 2 + vspltisb v4, 4 + vsl v3, v5, v4 + vaddubm v1, v3, v3 + vspltisb v5, 15 + vaddubm v2, v5, v5 + vaddubm v2, v1, v2 + vspltisb v4, -3 + vaddubm v2, v2, v4 + + /* + 1. Load 16 bytes from r3 and r4 + 2. Check if there is null, If yes, proceed byte by byte path. + 3. Else,Convert both to lowercase and compare. + 4. If they are same proceed to 1. + 5. If they dont match, find if first char of r4 is present in the + loaded 16 byte of r3. + 6. If yes, move position, load next 16 bytes of r3 and proceed to 2. + */ + + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + clrldi r10, r4, 60 + lvx v5, 0, r4 /* Load 16 bytes from r4. */ + cmpdi cr7, r10, 0 + beq cr7, L(begin2) + /* If r4 is unaligned, load another 16 bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v7 +#else + vperm v5, v5, v9, v7 +#endif +L(begin2): + lvx v4, 0, r3 + vcmpequb. v7, v0, v4 /* Check for null. */ + beq cr6, L(nullchk6) + b L(trailcheck) + + .align 4 +L(nullchk6): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(next16) +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif +L(next16): + vcmpequb. v6, v0, v5 /* Check for null. */ + beq cr6, L(nullchk) + b L(trailcheck) + + .align 4 +L(nullchk): + vcmpequb. v6, v0, v4 + beq cr6, L(nullchk1) + b L(retnull) + + .align 4 +L(nullchk1): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v5) + /* If both are same, branch to match. */ + blt cr6, L(match) + /* Find if the first char is present in next 15 bytes. */ +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v5, 15 + vsldoi v7, v0, v4, 15 +#else + vspltb v6, v5, 0 + vspltisb v7, 8 + vslo v7, v4, v7 +#endif + vcmpequb v7, v6, v7 + vcmpequb. v6, v0, v7 + /* Shift r3 by 16 bytes and proceed. */ + blt cr6, L(shift16) + VCLZD_V8_v7 +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v8, 15 +#else + vspltb v6, v8, 7 +#endif + vcmpequb. v6, v6, v1 + /* Shift r3 by 8 bytes and proceed. */ + blt cr6, L(shift8) + b L(begin) + + .align 4 +L(match): + /* There is a match of 16 bytes, check next bytes. */ + cmpdi cr7, r31, 16 + mr r29, r3 + beq cr7, L(ret_r3) + +L(secondmatch): + addi r3, r3, 16 + addi r4, r4, 16 + /* Load next 16 bytes of r3 and r4 and compare. */ + clrldi r10, r4, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload) + /* Handle unaligned case. */ + vor v6, v9, v9 + vcmpequb. v7, v0, v6 + beq cr6, L(nullchk2) + b L(trailcheck) + + .align 4 +L(nullchk2): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + /* If r4 is unaligned, load another 16 bytes. */ + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v11, v9, v6, v7 +#else + vperm v11, v6, v9, v7 +#endif + b L(compare) + + .align 4 +L(nextload): + lvx v11, 0, r4 +L(compare): + vcmpequb. v7, v0, v11 + beq cr6, L(nullchk3) + b L(trailcheck) + + .align 4 +L(nullchk3): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload1) + /* Handle unaligned case. */ + vor v4, v10, v10 + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk4) + b L(retnull) + + .align 4 +L(nullchk4): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif + b L(compare1) + + .align 4 +L(nextload1): + lvx v4, 0, r3 +L(compare1): + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk5) + b L(retnull) + + .align 4 +L(nullchk5): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v11) + /* If both are same, branch to secondmatch. */ + blt cr6, L(secondmatch) + /* Continue the search. */ + b L(begin) + + .align 4 +L(trailcheck): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) +L(loop2): + lbz r5, 0(r3) /* Load byte from r3. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + addi r3, r3, 1 + addi r4, r4, 1 /* Increment r4. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(begin) + b L(loop2) + + .align 4 +L(shift8): + addi r8, r8, 7 + b L(begin) + .align 4 +L(shift16): + addi r8, r8, 15 + .align 4 +L(begin): + addi r8, r8, 1 + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r4, r30 /* Restore r4. */ + b L(begin2) + + /* Handling byte by byte. */ + .align 4 +L(loop1): + mr r3, r8 + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r29, r8 + srdi r4, r28, 8 + /* Check if the first char is present. */ + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + sldi r4, r28, 56 + srdi r4, r4, 56 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(nextpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck1) + cmpw cr7, r3, r29 + ble cr7, L(nextpos) + /* Move r3 to first occurence. */ +L(skipcheck1): + mr r3, r29 +L(nextpos): + mr r29, r3 + cmpdi cr7, r3, 0 + ble cr7, L(retnull) +L(bytebybyte): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + mr r4, r30 /* Restore r4. */ + mr r8, r3 /* Save r3. */ + addi r8, r8, 1 + +L(loop): + addi r3, r3, 1 + lbz r5, 0(r3) /* Load byte from r3. */ + addi r4, r4, 1 /* Increment r4. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(loop1) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce r31 (len of r4) from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return point of match. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Substring was not found. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strcasestr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + cfi_restore(lr) + cfi_restore(r27) + cfi_restore(r28) + cfi_restore(r29) + cfi_restore(r30) + cfi_restore(r31) + mtlr r0 /* Branch to link register. */ + blr +END (STRCASESTR) + +weak_alias (__strcasestr, strcasestr) +libc_hidden_def (__strcasestr) +libc_hidden_builtin_def (strcasestr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S new file mode 100644 index 0000000000..e0c185c162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S @@ -0,0 +1,377 @@ +/* Optimized strchr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STRCHRNUL +# ifndef STRCHRNUL +# define FUNC_NAME __strchrnul +# else +# define FUNC_NAME STRCHRNUL +# endif +#else +# ifndef STRCHR +# define FUNC_NAME strchr +# else +# define FUNC_NAME STRCHR +# endif +#endif /* !USE_AS_STRCHRNUL */ + +/* int [r3] strchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +ENTRY (FUNC_NAME) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r5,r10,r11 + or r9,r6,r7 + or r12,r5,r9 + cmpdi cr7,r12,0 + beq cr7,L(vector) + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 +#ifdef USE_AS_STRCHRNUL + mr r5, r9 +#endif + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef USE_AS_STRCHRNUL + mr r10, r5 +#endif +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 +# ifndef USE_AS_STRCHRNUL + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +# endif +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ +# ifndef USE_AS_STRCHRNUL + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +# endif +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + /* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1,r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + /* One (or both) of the quadwords contains a c/null byte. */ + addi r3, r3, -32 +#ifndef USE_AS_STRCHRNUL + vcmpequb. v11, v0, v9 + blt cr6, L(no_match) +#endif + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + VBPERMQ(v6, v6, v10) + VBPERMQ(v7, v7, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 + vsldoi v7, v7, v7, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 + vsldoi v6, v6, v6, 6 + vsldoi v7, v7, v7, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v1, v3, v2 + vor v2, v6, v7 + vor v4, v1, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + /* Return NULL if null found before c. */ +#ifndef USE_AS_STRCHRNUL + lbz r4, 0(r3) + cmpdi cr7, r4, 0 + beq cr7, L(no_match) +#endif + blr + +#ifndef USE_AS_STRCHRNUL + .align 4 +L(no_match): + li r3,0 + blr +#endif + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr + .p2align 5 +L(vector1): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v8, -1 + vspltisb v0, 0 + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 +L(end1): + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (FUNC_NAME) + +#ifndef USE_AS_STRCHRNUL +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S new file mode 100644 index 0000000000..3bf4b275dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S @@ -0,0 +1,23 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCHRNUL 1 +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> + +weak_alias (__strchrnul,strchrnul) +libc_hidden_builtin_def (__strchrnul) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S new file mode 100644 index 0000000000..770484f1e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S @@ -0,0 +1,247 @@ +/* Optimized strcmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + size_t [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + +EALIGN (STRCMP, 4, 0) + li r0,0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7,r3,0,52 + rldicl r9,r4,0,52 + cmpldi cr7,r7,4096-16 + bgt cr7,L(pagecross_check) + cmpldi cr5,r9,4096-16 + bgt cr5,L(pagecross_check) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8,0(r3) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + addi r7,r3,16 + addi r4,r4,16 + +L(align_8b): + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ + rldicl r9,r7,0,61 /* source1 alignment to doubleword */ + subf r4,r9,r4 /* Adjust source2 address based on source1 + alignment. */ + rldicr r7,r7,0,60 /* Align source1 to doubleword. */ + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r9,r4,0x7 + bne cr0,L(loop_diff_align) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + .align 4 +L(loop_equal_align): + ld r8,8(r7) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,16(r7) + ld r10,16(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ldu r8,24(r7) + ldu r10,24(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + b L(loop_equal_align) + + /* A zero byte was found in r8 (s1 dword), r9 contains the cmpb + result and r10 the dword from s2. To code isolate the byte + up to end (including the '\0'), masking with 0xFF the remaining + ones: + + #if __LITTLE_ENDIAN__ + (__builtin_ffsl (x) - 1) = counting trailing zero bits + r9 = (__builtin_ffsl (r9) - 1) + 8; + r9 = -1UL << r9 + #else + r9 = __builtin_clzl (r9) + 8; + r9 = -1UL >> r9 + #endif + r8 = r8 | r9 + r10 = r10 | r9 */ + +#ifdef __LITTLE_ENDIAN__ + nor r9,r9,r9 +L(different_nocmpb): + neg r3,r9 + and r9,r9,r3 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 +L(different_nocmpb): + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + extsw r3,r3 + blr + + .align 4 +L(pagecross_check): + subfic r9,r9,4096 + subfic r7,r7,4096 + cmpld cr7,r7,r9 + bge cr7,L(pagecross) + mr r7,r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7,r3,r7 + subf r9,r3,r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9,0(r3) + lbz r10,0(r4) + addi r3,r3,1 + addi r4,r4,1 + cmplw cr7,r9,r10 + cmpdi cr5,r9,r0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align_8b) + + .align 4 + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load, the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ +L(check_source2_byte): + li r9,8 + mtctr r9 + + .align 4 +L(check_source2_byte_loop): + lbz r9,0(r7) + lbz r10,0(r4) + addi r7,r7,1 + addi r4,r4,1 + cmplw cr7,r9,10 + cmpdi r5,r9,0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(check_source2_byte_loop) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 5 +L(loop_unaligned): + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + addi r7,r7,8 + addi r4,r4,8 + +L(loop_diff_align): + /* Check if [src2]+8 cross a 4k page boundary: + + srcin2 % PAGE_SIZE > (PAGE_SIZE - 8) + + with PAGE_SIZE being 4096. */ + rldicl r9,r4,0,52 + cmpldi cr7,r9,4088 + ble cr7,L(loop_unaligned) + b L(check_source2_byte) + + .align 4 +L(pagecross_ne): + extsw r3,r9 + mr r9,r10 +L(pagecross_retdiff): + subf r9,r9,r3 + extsw r3,r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3,0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S new file mode 100644 index 0000000000..7f2cee4b1b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S @@ -0,0 +1,270 @@ +/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPCPY +# ifndef STPCPY +# define FUNC_NAME __stpcpy +# else +# define FUNC_NAME STPCPY +# endif +#else +# ifndef STRCPY +# define FUNC_NAME strcpy +# else +# define FUNC_NAME STRCPY +# endif +#endif /* !USE_AS_STPCPY */ + +/* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + or + + char * [r3] stpcpy (char *dest [r3], const char *src [r4]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r9,r4,15 + xor r9,r9,r4 + rlwinm. r9,r9,0,19,19 + bne L(pagecross) + + /* For short string (less than 16 bytes), just calculate its size as + strlen and issues a memcpy if null is found. */ + mr r7,r4 + ld r12,0(r7) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r8,8(r7) + cmpb r10,r8,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + b L(loop_before) + + .align 4 +L(pagecross): + clrrdi r7,r4,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r4,3,26,28 /* Calculate padding. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r7) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ld r12,0(r7) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* We checked for 24 - x bytes, with x being the source alignment + (0 <= x <= 16), and no zero has been found. Start the loop + copy with doubleword aligned address. */ + mr r7,r4 + ld r12, 0(r7) + ldu r8, 8(r7) + +L(loop_before): + /* Save the two doublewords readed from source and align the source + to 16 bytes for the loop. */ + mr r11,r3 + std r12,0(r11) + std r8,8(r11) + addi r11,r11,16 + rldicl r9,r4,0,60 + subf r7,r9,r7 + subf r11,r9,r11 + b L(loop_start) + + .align 5 +L(loop): + std r12, 0(r11) + std r6, 8(r11) + addi r11,r11,16 +L(loop_start): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r7) + ldu r6, 16(r7) + cmpb r10,r12,r0 + cmpb r9,r6,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + addi r4,r7,-8 + cmpdi cr6,r10,0 + addi r7,r7,-8 + bne cr6,L(done2) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r7,r7,8 + b L(done2) + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): + mr r11,r3 +L(done2): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r6, r9 /* Count the bits in the mask. */ +#else + cntlzd r6,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r4,r7 + srdi r6,r6,3 /* Convert leading/trailing zeros to bytes. */ + add r8,r5,r6 /* Compute final length. */ +#ifdef USE_AS_STPCPY + /* stpcpy returns the dest address plus the size not counting the + final '\0'. */ + add r3,r11,r8 +#endif + addi r8,r8,1 /* Final '/0'. */ + + cmpldi cr6,r8,8 + mtocrf 0x01,r8 + ble cr6,L(copy_LE_8) + + cmpldi cr1,r8,16 + blt cr1,8f + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + /* At least 6 bytes to go. */ + blt cr1,8f + + /* Copy 16 bytes. */ + ld r6,0(r4) + ld r8,8(r4) + addi r4,r4,16 + std r6,0(r11) + std r8,8(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + ld r6,0(r4) + addi r4,r4,8 + std r6,0(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz r6,0(r4) + stw r6,0(r11) + bf 30,L(tail5) + lhz r7,4(r4) + sth r7,4(r11) + bflr 31 + lbz r8,6(r4) + stb r8,6(r11) + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz r6,0(r4) + sth r6,0(r11) + bflr 31 + lbz r7,2(r4) + stb r7,2(r11) + blr + + .align 4 +L(tail5): + bf 31,1f + lbz r6,4(r4) + stb r6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz r6,0(r4) + stb r6,0(r11) + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + ld r6,0(r4) + std r6,0(r11) + blr +END (FUNC_NAME) + +#ifndef USE_AS_STPCPY +libc_hidden_builtin_def (strcpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S new file mode 100644 index 0000000000..c9a7a2e3c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S @@ -0,0 +1,20 @@ +/* Optimized strcspn implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCSPN 1 +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S new file mode 100644 index 0000000000..8f4a1fc1dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S @@ -0,0 +1,301 @@ +/* Optimized strlen implementation for PowerPC64/POWER8 using a vectorized + loop. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRLEN, 4, 0) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + /* For shorter strings (< 64 bytes), we will not use vector registers, + as the overhead isn't worth it. So, let's use GPRs instead. This + will be done the same way as we do in the POWER7 implementation. + Let's see if we are aligned to a quadword boundary. If so, we can + jump to the first (non-vectorized) loop. Otherwise, we have to + handle the next DWORD first. */ + mtcrf 0x01,r4 + mr r9,r4 + addi r9,r9,8 + bt 28,L(align64) + + /* Handle the next 8 bytes so we are aligned to a quadword + boundary. */ + ldu r5,8(r4) + cmpb r10,r5,r0 + cmpdi cr7,r10,0 + addi r9,r9,8 + bne cr7,L(done) + +L(align64): + /* Proceed to the old (POWER7) implementation, checking two doublewords + per iteraction. For the first 56 bytes, we will just check for null + characters. After that, we will also check if we are 64-byte aligned + so we can jump to the vectorized implementation. We will unroll + these loops to avoid excessive branching. */ + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + /* Are we 64-byte aligned? If so, jump to the vectorized loop. + Note: aligning to 64-byte will necessarily slow down performance for + strings around 64 bytes in length due to the extra comparisons + required to check alignment for the vectorized loop. This is a + necessary tradeoff we are willing to take in order to speed up the + calculation for larger strings. */ + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + + /* At this point, we are necessarily 64-byte aligned. If no zeroes were + found, jump to the vectorized loop. */ + beq cr7,L(preloop) + +L(dword_zero): + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r11 + addi r4,r4,8 + + /* If the null byte was found in the non-vectorized code, compute the + final length. r10 has the output of the cmpb instruction, that is, + it contains 0xff in the same position as the null byte in the + original doubleword from the string. Use that to calculate the + length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr + + /* Vectorized implementation starts here. */ + .p2align 4 +L(preloop): + /* Set up for the loop. */ + mr r4,r9 + li r7, 16 /* Load required offsets. */ + li r8, 32 + li r9, 48 + li r12, 8 + vxor v0,v0,v0 /* VR with null chars to use with + vcmpequb. */ + + /* Main loop to look for the end of the string. We will read in + 64-byte chunks. Align it to 32 bytes and unroll it 3 times to + leverage the icache performance. */ + .p2align 5 +L(loop): + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + beq cr6,L(loop) + +L(vmx_zero): + /* OK, we found a null byte. Let's look for it in the current 64-byte + block and mark it in its corresponding VR. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* We will now 'compress' the result into a single doubleword, so it + can be moved to a GPR for the final calculation. First, we + generate an appropriate mask for vbpermq, so we can permute bits into + the first halfword. */ + vspltisb v10,3 + lvsl v11,r0,r0 + vslb v10,v11,v10 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + MFVRD(r10,v4) + + /* Adjust address to the begninning of the current 64-byte block. */ + addi r4,r4,-64 + +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + add r3,r5,r0 /* Compute final length. */ + blr + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S new file mode 100644 index 0000000000..32e09e4d94 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S @@ -0,0 +1,20 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRNCASECMP 1 +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S new file mode 100644 index 0000000000..3d8df90538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S @@ -0,0 +1,327 @@ +/* Optimized strncmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + mr. r10,r5 + beq cr0,L(ret0) + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + rldicl r8,r3,0,52 + cmpldi cr7,r8,4096-16 + bgt cr7,L(pagecross) + rldicl r9,r4,0,52 + cmpldi cr7,r9,4096-16 + bgt cr7,L(pagecross) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r7,0(r3) + ld r9,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + /* If the string compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7,r10,8 + li r9,0 + ble cr7,L(ret1) + addi r5,r10,-8 + + ld r7,8(r3) + ld r9,8(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different0) + + cmpldi cr7,r5,8 + mr r9,r8 + ble cr7,L(ret1) + + /* Update pointers and size. */ + addi r10,r10,-16 + addi r3,r3,16 + addi r4,r4,16 + + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ +L(align_8b): + rldicl r5,r3,0,61 + rldicr r3,r3,0,60 + subf r4,r5,r4 + add r10,r10,r5 + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r8,r4,0x7 + beq cr0,L(loop_eq_align_0) + + li r5,0 + b L(loop_ne_align_1) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 4 +L(loop_ne_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r10,r10,-8 + addi r3,r3,8 + addi r4,r4,8 +L(loop_ne_align_1): + rldicl r9,r4,0,52 + cmpldi r7,r9,4088 + ble cr7,L(loop_ne_align_0) + cmpdi cr7,r10,0 + beq cr7,L(ret0) + + lbz r9,0(r3) + lbz r8,0(r4) + cmplw cr7,r9,r8 + bne cr7,L(byte_ne_4) + cmpdi cr7,r9,0 + beq cr7,L(size_reached_0) + + li r9,r7 + addi r8,r3,1 + mtctr r9 + addi r4,r4,1 + addi r10,r10,-1 + addi r3,r3,8 + + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ + .align 4 +L(loop_ne_align_byte): + cmpdi cr7,r10,0 + addi r10,r10,-1 + beq cr7,L(ret0) + lbz r9,0(r8) + lbz r7,0(r4) + addi r8,r8,1 + addi r4,r4,1 + cmplw cr7,r9,r7 + cmpdi cr5,r9,0 + bne cr7,L(size_reached_2) + beq cr5,L(size_reached_0) + bdnz L(loop_ne_align_byte) + + cmpdi cr7,r10,0 + bne+ cr7,L(loop_ne_align_0) + + .align 4 +L(ret0): + li r9,0 +L(ret1): + mr r3,r9 + blr + + /* The code now check if r8 and r10 are different by issuing a + cmpb and shift the result based on its output: + + #ifdef __LITTLE_ENDIAN__ + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + #else + leadzero = __builtin_clzl (z1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> (56 - leadzero)) & 0xFFUL; + r2 = (r2 >> (56 - leadzero)) & 0xFFUL; + #endif + return r1 - r2; */ + + .align 4 +L(different0): + mr r10,r5 +#ifdef __LITTLE_ENDIAN__ +L(different1): + neg r11,r8 + sldi r10,r10,3 + and r8,r11,r8 + addi r10,r10,-8 + cntlzd r8,r8 + subfic r8,r8,63 + extsw r8,r8 + cmpld cr7,r8,r10 + ble cr7,L(different2) + mr r8,r10 +L(different2): + extsw r8,r8 +#else +L(different1): + addi r10,r10,-1 + cntlzd r8,r8 + sldi r10,r10,3 + cmpld cr7,r8,r10 + blt cr7,L(different2) + mr r8,r10 +L(different2): + subfic r8,r8,56 +#endif + srd r7,r7,r8 + srd r9,r9,r8 + rldicl r3,r7,0,56 + rldicl r9,r9,0,56 + subf r9,r9,3 + extsw r9,r9 + mr r3,r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7,0(r3) + lbz r9,0(r4) + subfic r8,r8,4095 + cmplw cr7,r9,r7 + bne cr7,L(byte_ne_3) + cmpdi cr7,r9,0 + beq cr7,L(byte_ne_0) + addi r10,r10,-1 + subf r7,r8,r10 + subf r9,r7,r10 + addi r9,r9,1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7,L(ret0) + lbz r9,0(r3) + lbz r8,0(r4) + addi r10,r10,-1 + cmplw cr7,r9,r8 + cmpdi cr5,r9,0 + bne r7,L(byte_ne_2) + beq r5,L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7,r10,0 + addi r3,r3,1 + addi r4,r4,1 + bdnz L(pagecross_loop0) + cmpdi cr7,r7,0 + li r9,0 + bne+ cr7,L(align_8b) + b L(ret1) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + .align 4 +L(loop_eq_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r9,r10,-9 + + li r5,0 + srdi r9,r9,3 + addi r9,r9,1 + mtctr r9 + b L(loop_eq_align_2) + + .align 4 +L(loop_eq_align_1): + bdz L(ret0) +L(loop_eq_align_2): + ldu r7,8(r3) + addi r10,r10,-8 + ldu r9,8(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + beq cr0,L(loop_eq_align_1) + b L(different1) + + .align 4 +L(byte_ne_0): + li r7,0 +L(byte_ne_1): + subf r9,r9,r7 + extsw r9,r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7,r9 + mr r9,r8 + b L(byte_ne_1) +L(size_reached_0): + li r10,0 +L(size_reached_1): + subf r9,r9,r10 + extsw r9,r9 + b L(ret1) +L(size_reached_2): + extsw r10,r9 + mr r9,r7 + b L(size_reached_1) +L(byte_ne_3): + extsw r7,r7 + b L(byte_ne_1) +L(byte_ne_4): + extsw r10,r9 + mr r9,r8 + b L(size_reached_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S new file mode 100644 index 0000000000..6d40f30ff7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S @@ -0,0 +1,465 @@ +/* Optimized strncpy/stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#ifndef MEMSET +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) + +/* Implements the function + + char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + or + + char * [r3] stpncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r10,r4,16 + rlwinm r9,r4,0,19,19 + + /* Save some non-volatile registers on the stack. */ + std r26,-48(r1) + std r27,-40(r1) + + rlwinm r8,r10,0,19,19 + + std r28,-32(r1) + std r29,-24(r1) + + cmpld cr7,r9,r8 + + std r30,-16(r1) + std r31,-8(r1) + + /* Update CFI. */ + cfi_offset(r26, -48) + cfi_offset(r27, -40) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(r30, -16) + cfi_offset(r31, -8) + + beq cr7,L(unaligned_lt_16) + rldicl r9,r4,0,61 + subfic r8,r9,8 + cmpld cr7,r5,r8 + bgt cr7,L(pagecross) + + /* At this points there is 1 to 15 bytes to check and write. Since it could + be either from first unaligned 16 bytes access or from bulk copy, the code + uses an unrolled byte read/write instead of trying to analyze the cmpb + results. */ +L(short_path): + mr r9,r3 +L(short_path_1): + /* Return if there are no more bytes to be written. */ + cmpdi cr7,r5,0 + beq cr7,L(short_path_loop_end_1) +L(short_path_2): + /* Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r10,0(r4) + cmpdi cr7,r10,0 + stb r10,0(r9) + beq cr7,L(zero_pad_start_1) + /* If there are no more bytes to be written, return. */ + cmpdi cr0,r5,1 + addi r8,r9,1 + addi r6,r5,-1 + beq cr0,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r10,1(r4) + cmpdi cr7,r10,0 + stb r10,1(r9) + beq cr7,L(zero_pad_start_prepare_1) + /* Eagerly decrement r5 by 3, which is the number of bytes already + written, plus one write that will be performed later on. */ + addi r10,r5,-3 + b L(short_path_loop_1) + + .align 4 +L(short_path_loop): + /* At this point, the induction variable, r5, as well as the pointers + to dest and src (r9 and r4, respectivelly) have been updated. + + Note: The registers r7 and r10 are induction variables derived from + r5. They are used to determine if the total number of writes has + been reached at every other write. + + Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r8,0(r4) + addi r7,r10,-2 + cmpdi cr5,r8,0 + stb r8,0(r9) + beq cr5,L(zero_pad_start_1) + beq cr7,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r8,1(r4) + cmpdi cr7,r8,0 + stb r8,1(r9) + beq cr7,L(zero_pad_start) + mr r10,r7 +L(short_path_loop_1): + /* This block is reached after two chars have been already written to + dest. Nevertheless, r5 (the induction variable), r9 (the pointer to + dest), and r4 (the pointer to src) have not yet been updated. + + At this point: + r5 holds the count of bytes yet to be written plus 2. + r9 points to the last two chars that were already written to dest. + r4 points to the last two chars that were already copied from src. + + The algorithm continues by decrementing r5, the induction variable, + so that it reflects the last two writes. The pointers to dest (r9) + and to src (r4) are increment by two, for the same reason. + + Note: Register r10 is another induction variable, derived from r5, + which determines if the total number of writes has been reached. */ + addic. r5,r5,-2 + addi r9,r9,2 + cmpdi cr7,r10,0 /* Eagerly check if the next write is the last. */ + addi r4,r4,2 + addi r6,r9,1 + bne cr0,L(short_path_loop) /* Check if the total number of writes + has been reached at every other + write. */ +#ifdef USE_AS_STPNCPY + mr r3,r9 + b L(short_path_loop_end) +#endif + +L(short_path_loop_end_0): +#ifdef USE_AS_STPNCPY + addi r3,r9,1 + b L(short_path_loop_end) +#endif +L(short_path_loop_end_1): +#ifdef USE_AS_STPNCPY + mr r3,r9 +#endif +L(short_path_loop_end): + /* Restore non-volatile registers. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* This code pads the remainder of dest with NULL bytes. The algorithm + calculates the remaining size and calls memset. */ + .align 4 +L(zero_pad_start): + mr r5,r10 + mr r9,r6 +L(zero_pad_start_1): + /* At this point: + - r5 holds the number of bytes that still have to be written to + dest. + - r9 points to the position, in dest, where the first null byte + will be written. + The above statements are true both when control reaches this label + from a branch or when falling through the previous lines. */ +#ifndef USE_AS_STPNCPY + mr r30,r3 /* Save the return value of strncpy. */ +#endif + /* Prepare the call to memset. */ + mr r3,r9 /* Pointer to the area to be zero-filled. */ + li r4,0 /* Byte to be written (zero). */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl MEMSET + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + +#ifndef USE_AS_STPNCPY + mr r3,r30 /* Restore the return value of strncpy, i.e.: + dest. For stpncpy, the return value is the + same as return value of memset. */ +#endif + + /* Restore non-volatile registers and return. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* The common case where [src]+16 will not cross a 4K page boundary. + In this case the code fast check the first 16 bytes by using doubleword + read/compares and update destiny if neither total size or null byte + is found in destiny. */ + .align 4 +L(unaligned_lt_16): + cmpldi cr7,r5,7 + ble cr7,L(short_path) + ld r7,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2) + addi r6,r5,-8 + std r7,0(r3) + addi r9,r3,8 + cmpldi cr7,r6,7 + addi r7,r4,8 + ble cr7,L(short_path_prepare_1_1) + ld r4,8(r4) + cmpb r8,r4,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2_1) + std r4,8(r3) + addi r29,r3,16 + addi r5,r5,-16 + /* Neither the null byte was found or total length was reached, + align to 16 bytes and issue a bulk copy/compare. */ + b L(align_to_16b) + + /* In the case of 4k page boundary cross, the algorithm first align + the address to a doubleword, calculate a mask based on alignment + to ignore the bytes and continue using doubleword. */ + .align 4 +L(pagecross): + rldicr r11,r4,0,59 /* Align the address to 8 bytes boundary. */ + li r6,-1 /* MASK = 0xffffffffffffffffUL. */ + sldi r9,r9,3 /* Calculate padding. */ + ld r7,0(r11) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r9,r6,r9 /* MASK = MASK << padding. */ +#else + srd r9,r6,r9 /* MASK = MASK >> padding. */ +#endif + orc r9,r7,r9 /* Mask bits that are not part of the + string. */ + li r7,0 + cmpb r9,r9,r7 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + subf r8,r8,r5 /* Adjust total length. */ + cmpldi cr7,r8,8 /* Check if length was reached. */ + ble cr7,L(short_path_prepare_2) + + /* For next checks we have aligned address, so we check for more + three doublewords to make sure we can read 16 unaligned bytes + to start the bulk copy with 16 aligned addresses. */ + ld r7,8(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r7,r8,-8 + cmpldi cr7,r7,8 + ble cr7,L(short_path_prepare_2) + ld r7,16(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r8,r8,-16 + cmpldi cr7,r8,8 + ble cr7,L(short_path_prepare_2) + ld r8,24(r11) + cmpb r9,r8,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + + /* No null byte found in the 32 bytes readed and length not reached, + read source again using unaligned loads and store them. */ + ld r9,0(r4) + addi r29,r3,16 + addi r5,r5,-16 + std r9,0(r3) + ld r9,8(r4) + std r9,8(r3) + + /* Align source to 16 bytes and adjust destiny and size. */ +L(align_to_16b): + rldicl r9,r10,0,60 + rldicr r28,r10,0,59 + add r12,r5,r9 + subf r29,r9,r29 + + /* The bulk read/compare/copy loads two doublewords, compare and merge + in a single register for speed. This is an attempt to speed up the + null-checking process for bigger strings. */ + + cmpldi cr7,r12,15 + ble cr7,L(short_path_prepare_1_2) + + /* Main loop for large sizes, unrolled 2 times to get better use of + pipeline. */ + ld r8,0(28) + ld r10,8(28) + li r9,0 + cmpb r7,r8,r9 + cmpb r9,r10,r9 + or. r6,r9,r7 + bne cr0,L(short_path_prepare_2_3) + addi r5,r12,-16 + addi r4,r28,16 + std r8,0(r29) + std r10,8(r29) + cmpldi cr7,r5,15 + addi r9,r29,16 + ble cr7,L(short_path_1) + mr r11,r28 + mr r6,r29 + li r30,0 + subfic r26,r4,48 + subfic r27,r9,48 + + b L(loop_16b) + + .align 4 +L(loop_start): + ld r31,0(r11) + ld r10,8(r11) + cmpb r0,r31,r7 + cmpb r8,r10,r7 + or. r7,r0,r8 + addi r5,r5,-32 + cmpldi cr7,r5,15 + add r4,r4,r26 + add r9,r9,r27 + bne cr0,L(short_path_prepare_2_2) + add r4,r28,r4 + std r31,0(r6) + add r9,r29,r9 + std r10,8(r6) + ble cr7,L(short_path_1) + +L(loop_16b): + ld r10,16(r11) + ld r0,24(r11) + cmpb r8,r10,r30 + cmpb r7,r0,r30 + or. r7,r8,r7 + addi r12,r12,-32 + cmpldi cr7,r12,15 + addi r11,r11,32 + bne cr0,L(short_path_2) + std r10,16(r6) + addi r6,r6,32 + std r0,-8(r6) + bgt cr7,L(loop_start) + + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_1) + + .align 4 +L(short_path_prepare_1_1): + mr r5,r6 + mr r4,r7 + b L(short_path_1) +L(short_path_prepare_1_2): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_1) +L(short_path_prepare_2): + mr r9,r3 + b L(short_path_2) +L(short_path_prepare_2_1): + mr r5,r6 + mr r4,r7 + b L(short_path_2) +L(short_path_prepare_2_2): + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_2) +L(short_path_prepare_2_3): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_2) +L(zero_pad_start_prepare_1): + mr r5,r6 + mr r9,r8 + b L(zero_pad_start_1) +END (FUNC_NAME) + +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S new file mode 100644 index 0000000000..3eadbfb09e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S @@ -0,0 +1,433 @@ +/* Optimized strnlen implementation for POWER8 using a vmx loop. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* It is implemented the following heuristic: + 1. Case maxlen <= 32: align the pointer to 8 bytes to loop through + reading doublewords. Uses the POWER7 algorithm. + 2. Case maxlen > 32: check for null bytes in the first 16 bytes using + unaligned accesses. Return length if found. Otherwise: + 2.1 Case maxlen < 64: deduct the bytes previously read, align + the pointer to 16 bytes and loop through reading quadwords + until find null bytes or reach maxlen. + 2.2 Case maxlen > 64: deduct the bytes previously read, align + the pointer to 64 bytes and set up a counter to loop through + reading in strides of 64 bytes. In case it finished the loop + with null bytes not found, process the remainder bytes by + switching to the loop to heuristic in 2.1. */ + +#include <sysdep.h> + +/* Define default page size to 4KB. */ +#define PAGE_SIZE 4096 + +/* The following macros implement Power ISA v2.07 opcodes + that could not be used directly into this code to the keep + compatibility with older binutils versions. */ + +/* Move from vector register doubleword. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Move to vector register doubleword. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Vector Bit Permute Quadword. */ +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Vector Population Count Halfword. */ +#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21))) + +/* Vector Count Leading Zeros Halfword. */ +#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21))) + + +/* int [r3] strnlen (char *s [r3], size_t maxlen [r4]) */ +/* TODO: change to power8 when minimum required binutils allows it. */ + .machine power7 +ENTRY (__strnlen) + CALL_MCOUNT 2 + dcbt 0,r3 + + cmpldi r4,32 /* Check if maxlen <= 32. */ + ble L(small_range) /* If maxlen <= 32. */ + + /* Upcoming 16 bytes unaligned accesses cannot cross the page boundary + otherwise the processor throws an memory access error. + Use following code to check there is room for such as accesses: + (((size_t) s) % PAGE_SIZE > (PAGE_SIZE - 16) + If it is disallowed then switch to the code that handles + the string when maxlen <= 32. */ + clrldi r10,r3,52 + cmpldi cr7,r10,PAGE_SIZE-16 + bgt cr7,L(small_range) /* If less than 16B of page end. */ + + /* Compute our permute constant r8. */ + li r7,0 + /* Compute a bpermd constant to move bit 0 of each word into + a halfword value, and count trailing zeros. */ +#ifdef __LITTLE_ENDIAN__ + li r8,0x2820 + oris r8,r8,0x3830 + sldi r8,r8,32 + ori r8,r8,0x0800 + oris r8,r8,0x1810 +#else + li r8,0x1018 + oris r8,r8,0x0008 + sldi r8,r8,32 + ori r8,r8,0x3038 + oris r8,r8,0x2028 +#endif + + /* maxlen > 32. Optimistically check for null bytes in the first + 16 bytes of the string using unaligned accesses. */ + ld r5,0(r3) + ld r6,8(r3) + cmpb r10,r7,r5 /* Check for null bytes in DWORD1. */ + cmpb r11,r7,r6 /* Check for null bytes in DWORD2. */ + or. r7,r10,r11 + bne cr0, L(early_find) /* If found null bytes. */ + + /* At this point maxlen > 32 and null bytes were not found at first + 16 bytes. Prepare for loop using VMX. */ + + /* r3 == s, r4 == maxlen. All other volatile regs are unused now. */ + + addi r5,r3,16 /* Align up, or just add the 16B we + already checked. */ + li r0,15 + and r7,r5,r0 /* Find offset into 16B alignment. */ + andc r5,r5,r0 /* Quadword align up s to the next quadword. */ + li r0,16 + subf r0,r7,r0 + subf r4,r0,r4 /* Deduct unaligned bytes from maxlen. */ + + + /* Compute offsets for vmx loads, and precompute the vbpermq + constants for both the 64B and 16B loops. */ + li r6,0 + vspltisb v0,0 + vspltisb v10,3 + lvsl v11,r6,r6 + vslb v10,v11,v10 + + cmpldi r4,64 /* Check maxlen < 64. */ + blt L(smaller) /* If maxlen < 64 */ + + /* In order to begin the 64B loop, it needs to be 64 + bytes aligned. So read quadwords until it is aligned or found null + bytes. At worst case it will be aligned after the fourth iteration, + so unroll the loop to avoid counter checking. */ + andi. r7,r5,63 /* Check if is 64 bytes aligned. */ + beq cr0,L(preloop_64B) /* If it is already 64B aligned. */ + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 /* Decrement maxlen in 16 bytes. */ + bne cr6,L(found_aligning64B) /* If found null bytes. */ + + /* Unroll 3x above code block until aligned or find null bytes. */ + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + /* At this point it should be 16 bytes aligned. + Prepare for the 64B loop. */ + .p2align 4 +L(preloop_64B): + /* Check if maxlen became is less than 64, therefore disallowing the + 64B loop. If it happened switch to the 16B loop code. */ + cmpldi r4,64 /* Check if maxlen < 64. */ + blt L(smaller) /* If maxlen < 64. */ + /* Set some constant values. */ + li r7,16 + li r10,32 + li r9,48 + + /* Compute the number of 64 bytes iterations needed. */ + srdi r11,r4,6 /* Compute loop count (maxlen / 64). */ + andi. r4,r4,63 /* Set maxlen the remainder (maxlen % 64). */ + mtctr r11 /* Move loop count to counter register. */ + + /* Handle maxlen > 64. Loop over the bytes in strides of 64B. */ + .p2align 4 +L(loop_64B): + lvx v1,r5,r6 /* r5 is the pointer to s. */ + lvx v2,r5,r7 + lvx v3,r5,r10 + lvx v4,r5,r9 + /* Compare the four 16B vectors to obtain the least 16 values. + Null bytes should emerge into v7, then check for null bytes. */ + vminub v5,v1,v2 + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for null bytes. */ + addi r5,r5,64 /* Add pointer to next iteraction. */ + bne cr6,L(found_64B) /* If found null bytes. */ + bdnz L(loop_64B) /* Continue the loop if count > 0. */ + +/* Hit loop end without null match. So branch to handle the remainder. */ + + /* Prepare a 16B loop to handle two cases: + 1. If 32 > maxlen < 64. + 2. If maxlen >= 64, and reached end of the 64B loop with null + bytes not found. Thus handle the remainder bytes here. */ + .p2align 4 +L(smaller): + cmpldi r4,0 /* Check maxlen is zero. */ + beq L(done) /* If maxlen is zero. */ + + /* Place rounded up number of qw's to check into a vmx + register, and use some vector tricks to minimize + branching. */ + MTVRD(v7,r4) /* Copy maxlen from GPR to vector register. */ + vspltisb v5,1 + vspltisb v6,15 + vspltb v2,v7,7 + vaddubs v3,v5,v6 + +#ifdef __LITTLE_ENDIAN__ + vspltish v5,1 /* Compute 16 in each byte. */ +#endif + + /* Loop in 16B aligned incremements now. */ + .p2align 4 +L(loop_16B): + lvx v1,r5,r6 /* Load quadword into vector register. */ + addi r5,r5,16 /* Increment address to next 16B block. */ + vor v7,v2,v2 /* Save loop count (v2) into v7. */ + vsububs v2,v2,v3 /* Subtract 16B from count, saturate at 0. */ + vminub v4,v1,v2 + vcmpequb. v4,v4,v0 /* Checking for null bytes. */ + beq cr6,L(loop_16B) /* If null bytes not found. */ + + vcmpequb v1,v1,v0 + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + vsubuhm v2,v1,v5 /* Form a mask of trailing zeros. */ + vandc v2,v2,v1 + VPOPCNTH(v1,v2) /* Count of trailing zeros, 16 if none. */ +#else + VCLZH(v1,v1) /* Count the leading zeros, 16 if none. */ +#endif + /* Truncate to maximum allowable offset. */ + vcmpgtub v2,v1,v7 /* Compare and truncate for matches beyond + maxlen. */ + vsel v1,v1,v7,v2 /* 0-16 is now in byte 7. */ + + MFVRD(r0,v1) + addi r5,r5,-16 /* Undo speculative bump. */ + extsb r0,r0 /* Clear whatever gunk is in the high 56b. */ + add r5,r5,r0 /* Add the offset of whatever was found. */ +L(done): + subf r3,r3,r5 /* Length is equal to the offset of null byte + matched minus the pointer to s. */ + blr /* Done. */ + + /* Handle case of maxlen > 64 and found null bytes in last block + of 64 bytes read. */ + .p2align 4 +L(found_64B): + /* A zero was found. Reduce the result. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + + /* Adjust address to the start of the current 64B block. */ + addi r5,r5,-64 + + MFVRD(r10,v4) +#ifdef __LITTLE_ENDIAN__ + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r5 + add r3,r5,r0 /* Compute final length. */ + blr /* Done. */ + + /* Handle case where null bytes were found while aligning + as a preparation for the 64B loop. */ + .p2align 4 +L(found_aligning64B): + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + MFVRD(r10,v1) + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + vsldoi v1,v1,v1,6 + MFVRD(r10,v1) + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + addi r5,r5,-16 /* Adjust address to offset of last 16 bytes + read. */ + /* Calculate length as subtracted the pointer to s of last 16 bytes + offset, added with the bytes before the match. */ + subf r5,r3,r5 + add r3,r5,r0 + blr /* Done. */ + + /* Handle case of maxlen > 32 and found a null bytes within the first + 16 bytes of s. */ + .p2align 4 +L(early_find): + bpermd r5,r8,r10 /* r8 contains the bit permute constants. */ + bpermd r6,r8,r11 + sldi r5,r5,8 + or r5,r5,r6 /* r5 should hold a 16B mask of + a potential 0. */ + cntlzd r5,r5 /* Count leading zeros. */ + addi r3,r5,-48 /* Deduct the 48 leading zeros always + present. */ + blr /* Done. */ + + /* Handle case of maxlen <= 32. Use the POWER7 algorithm. */ + .p2align 4 +L(small_range): + clrrdi r8,r3,3 /* Align the pointer to 8B. */ + li r0,0 + /* Register's content at this point: + r3 == pointer to s, r4 == maxlen, r8 == pointer to s aligned to 8B, + r7 == last acceptable address. */ + cmpldi r4,0 /* Check if maxlen is zero. */ + beq L(end_max) /* If maxlen is zero. */ + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r4 + r7 |= -(r7 < x) */ + add r7,r3,r4 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + addi r7,r7,-1 + + clrrdi r7,r7,3 /* Align to 8B address of last + acceptable address. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load aligned doubleword. */ + cmpb r10,r12,r0 /* Check for null bytes. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif /* __LITTLE_ENDIAN__ */ + cmpldi cr7,r10,0 + bne cr7,L(done_small) /* If found null byte. */ + + cmpld r8,r7 /* Check if reached maxlen. */ + beq L(end_max) /* If reached maxlen. */ + + /* Still handling case of maxlen <= 32. Read doubleword aligned until + find null bytes or reach maxlen. */ + .p2align 4 +L(loop_small): + ldu r12,8(r8) /* Load next doubleword and update r8. */ + cmpb r10,r12,r0 /* Check for null bytes. */ + cmpldi cr6,r10,0 + bne cr6,L(done_small) /* If found null bytes. */ + cmpld r8,r7 /* Check if reached maxlen. */ + bne L(loop_small) /* If it has more bytes to read. */ + mr r3,r4 /* Reached maxlen with null bytes not found. + Length is equal to maxlen. */ + blr /* Done. */ + + /* Still handling case of maxlen <= 32. Found null bytes. + Registers: r10 == match bits within doubleword, r8 == address of + last doubleword read, r3 == pointer to s, r4 == maxlen. */ + .p2align 4 +L(done_small): +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zeros. */ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 /* Calculate total of bytes before the match. */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 /* Check length is greater than maxlen. */ + blelr + mr r3,r4 /* If length is greater than maxlen, return + maxlen. */ + blr + + /* Handle case of reached maxlen with null bytes not found. */ + .p2align 4 +L(end_max): + mr r3,r4 /* Length is equal to maxlen. */ + blr /* Done. */ + + +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S new file mode 100644 index 0000000000..8eb74853c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S @@ -0,0 +1,464 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* char *[r3] strrchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VADDUQM(t,a,b) .long (0x10000100 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#ifdef __LITTLE_ENDIAN__ +/* Find the match position from v6 and place result in r6. */ +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + vsldoi v6, v6, v6, 6; \ + MFVRD(r7, v6); \ + cntlzd r6, r7; \ + subfic r6, r6, 15; +/* + * Find the first null position to mask bytes after null. + * (reg): vcmpequb result: v2 for 1st qw v3 for 2nd qw. + * Result placed at v2. + */ +# define FIND_NULL_POS(reg) \ + vspltisb v11, -1; \ + VADDUQM(v11, reg, v11); \ + vandc v11, v11, reg; \ + VPOPCNTD(v2, v11); \ + vspltb v11, v2, 15; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vslo v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#else +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + MFVRD(r7, v6); \ + addi r6, r7, -1; \ + andc r6, r6, r7; \ + popcntd r6, r6; \ + subfic r6, r6, 15; +# define FIND_NULL_POS(reg) \ + VCLZD(v2, reg); \ + vspltb v11, v2, 7; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vsro v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#endif /* !__LITTLE_ENDIAN__ */ + .machine power7 +ENTRY (strrchr) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* Used to store last occurence. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now. If it's passed more chars, then + check for null again. */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + andi. r12, r8, 15 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(vector) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* If there are more than one 0xff in r11, find the first position of + 0xff in r11 and fill r10 with 0 from that position. */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1, r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + vcmpequb. v8, v0, v8 + blt cr6, L(match) + + /* One (or both) of the quadwords contains c/null. */ + vspltisb v8, 2 + vspltisb v9, 5 + /* Precompute values used for comparison. */ + vsl v9, v8, v9 /* v9 = 0x4040404040404040. */ + vaddubm v8, v9, v9 + vsldoi v8, v0, v8, 1 /* v8 = 0x80. */ + + /* Check if null is in second qw. */ + vcmpequb. v11, v0, v2 + blt cr6, L(secondqw) + + /* Null found in first qw. */ + addi r8, r3, -32 + /* Calculate the null position. */ + FIND_NULL_POS(v2) + /* Check if null is in the first byte. */ + vcmpequb. v11, v0, v2 + blt cr6, L(no_match) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v6, v6, v2 + vsro v6, v6, v2 +#else + vsro v6, v6, v2 + vslo v6, v6, v2 +#endif + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(secondqw): + addi r8, r3, -16 + FIND_NULL_POS(v3) + vcmpequb. v11, v0, v2 + blt cr6, L(no_match1) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v7, v7, v2 + vsro v7, v7, v2 +#else + vsro v7, v7, v2 + vslo v7, v7, v2 +#endif + vcmpequb. v11, v0, v7 + blt cr6, L(no_match1) + addi r8, r8, 16 + vor v6, v0, v7 +L(no_match1): + addi r8, r8, -16 + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(match): + /* One (or both) of the quadwords contains a match. */ + mr r8, r3 + vcmpequb. v8, v0, v7 + blt cr6, L(firstqw) + /* Match found in second qw. */ + addi r8, r8, 16 + vor v6, v0, v7 +L(firstqw): + addi r8, r8, -32 + CALCULATE_MATCH() + add r9, r8, r6 /* Compute final length. */ + b L(continue) +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + andi. r12, r8, 15 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector1): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 + /* Compare 32 bytes in each loop. */ +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (strrchr) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S new file mode 100644 index 0000000000..e9271898f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S @@ -0,0 +1,202 @@ +/* Optimized strspn implementation for Power8. + + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4]) */ + +/* This takes a novel approach by computing a 256 bit mask whereby + each set bit implies the byte is "accepted". P8 vector hardware + has extremely efficient hardware for selecting bits from a mask. + + One might ask "why not use bpermd for short strings"? It is + so slow that its performance about matches the generic PPC64 + variant without any fancy masking, with the added expense of + making the mask. That was the first variant of this. */ + + + +#include "sysdep.h" + +#ifndef USE_AS_STRCSPN +# define USE_AS_STRCSPN 0 +# ifndef STRSPN +# define STRSPN strspn +# endif +# define INITIAL_MASK 0 +# define UPDATE_MASK(RA, RS, RB) or RA, RS, RB +#else +# ifndef STRSPN +# define STRSPN strcspn +# endif +# define INITIAL_MASK -1 +# define UPDATE_MASK(RA, RS, RB) andc RA, RS, RB +#endif + +/* Simple macro to use VSX instructions in overlapping VR's. */ +#define XXVR(insn, vrt, vra, vrb) \ + insn 32+vrt, 32+vra, 32+vrb + +/* ISA 2.07B instructions are not all defined for older binutils. + Macros are defined below for these newer instructions in order + to maintain compatibility. */ + +/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + + /* This can be updated to power8 once the minimum version of + binutils supports power8 and the above instructions. */ + .machine power7 +EALIGN(STRSPN, 4, 0) + CALL_MCOUNT 2 + + /* Generate useful constants for later on. */ + vspltisb v1, 7 + vspltisb v2, -1 + vslb v1, v1, v1 /* 0x80 to swap high bit for vbpermq. */ + vspltisb v10, 0 + vsldoi v4, v10, v2, 2 /* 0xFFFF into vr4. */ + XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches. */ + + /* Prepare to compute 256b mask. */ + addi r4, r4, -1 + li r5, INITIAL_MASK + li r6, INITIAL_MASK + li r7, INITIAL_MASK + li r8, INITIAL_MASK + +#if USE_AS_STRCSPN + /* Ensure the null character never matches by clearing ISA bit 0 in + in r5 which is the bit which will check for it in the later usage + of vbpermq. */ + srdi r5, r5, 1 +#endif + + li r11, 1 + sldi r11, r11, 63 + + /* Start interleaved Mask computation. + This will eventually or 1's into ignored bits from vbpermq. */ + lvsr v11, 0, r3 + vspltb v11, v11, 0 /* Splat shift constant. */ + + /* Build a 256b mask in r5-r8. */ + .align 4 +L(next_needle): + lbzu r9, 1(r4) + + cmpldi cr0, r9, 0 + cmpldi cr1, r9, 128 + + /* This is a little tricky. srd only uses the first 7 bits, + and if bit 7 is set, value is always 0. So, we can + effectively shift 128b in this case. */ + xori r12, r9, 0x40 /* Invert bit 6. */ + srd r10, r11, r9 /* Mask for bits 0-63. */ + srd r12, r11, r12 /* Mask for bits 64-127. */ + + beq cr0, L(start_cmp) + + /* Now, or the value into the correct GPR. */ + bge cr1,L(needle_gt128) + UPDATE_MASK (r5, r5, r10) /* 0 - 63. */ + UPDATE_MASK (r6, r6, r12) /* 64 - 127. */ + b L(next_needle) + + .align 4 +L(needle_gt128): + UPDATE_MASK (r7, r7, r10) /* 128 - 191. */ + UPDATE_MASK (r8, r8, r12) /* 192 - 255. */ + b L(next_needle) + + + .align 4 +L(start_cmp): + /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */ + mr r0, r3 /* Save r3 for final length computation. */ + MTVRD (v5, r5) + MTVRD (v6, r6) + MTVRD (v7, r7) + MTVRD (v8, r8) + + /* Continue interleaved mask generation. */ +#ifdef __LITTLE_ENDIAN__ + vsrw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 0 /* Only care about the high 16 bits of v10. */ +#else + vslw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 1 /* Only care about the low 16 bits of v10. */ +#endif + lvx v0, 0, r3 /* Note, unaligned load ignores lower bits. */ + + /* Do the merging of the bitmask. */ + XXVR(xxmrghd, v5, v5, v6) + XXVR(xxmrghd, v6, v7, v8) + + /* Finish mask generation. */ + vand v11, v11, v4 /* Throwaway bits not in the mask. */ + + /* Compare the first 1-16B, while masking unwanted bytes. */ + clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */ + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vor v7, v7, v11 /* Ignore non-participating bytes. */ + vcmpequh. v8, v7, v4 + bnl cr6, L(done) + + addi r3, r3, 16 + + .align 4 +L(vec): + lvx v0, 0, r3 + addi r3, r3, 16 + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vcmpequh. v8, v7, v4 + blt cr6, L(vec) + + addi r3, r3, -16 +L(done): + subf r3, r0, r3 + MFVRD (r10, v7) + +#ifdef __LITTLE_ENDIAN__ + addi r0, r10, 1 /* Count the trailing 1's. */ + andc r10, r10, r0 + popcntd r10, r10 +#else + xori r10, r10, 0xffff /* Count leading 1's by inverting. */ + addi r3, r3, -48 /* Account for the extra leading zeros. */ + cntlzd r10, r10 +#endif + + add r3, r3, r10 + blr + +END(STRSPN) +libc_hidden_builtin_def (STRSPN) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies new file mode 100644 index 0000000000..fad2505ab9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power8/fpu +powerpc/powerpc64/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies new file mode 100644 index 0000000000..ae0dbaf857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..f11e1bdba2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies new file mode 100644 index 0000000000..dd6bca4b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S new file mode 100644 index 0000000000..2dc4f6c722 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S @@ -0,0 +1,268 @@ +/* Optimized strcmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access for first 32 bytes + as in POWER8 patch and uses vectorised loops after that. */ + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when the minimum required binutils + allows it. */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + li r0, 0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7, r3, 0, 52 + rldicl r9, r4, 0, 52 + cmpldi cr7, r7, 4096-16 + bgt cr7, L(pagecross_check) + cmpldi cr5, r9, 4096-16 + bgt cr5, L(pagecross_check) + + /* For short strings up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8, 0(r3) + ld r10, 0(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + ld r8, 8(r3) + ld r10, 8(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + addi r7, r3, 16 + addi r4, r4, 16 + +L(align): + /* Now it has checked for first 16 bytes. */ + vspltisb v0, 0 + vspltisb v2, -1 + lvsr v6, 0, r4 /* Compute mask. */ + or r5, r4, r7 + andi. r5, r5, 0xF + beq cr0, L(aligned) + andi. r5, r7, 0xF + beq cr0, L(s1_align) + lvsr v10, 0, r7 /* Compute mask. */ + + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v4, r7, v10) + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + clrldi r6, r7, 60 + subfic r5, r6, 16 + add r7, r7, r5 + add r4, r4, r5 + andi. r5, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2 and compares. + Loop until a mismatch or null occurs. */ +L(s1_align): + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(s1_align) + b L(different) + + .align 4 +L(aligned): + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(different_nocmpb): + neg r3, r9 + and r9, r9, r3 + cntlzd r9, r9 + subfic r9, r9, 63 + srd r3, r8, r9 + srd r10, r10, r9 + rldicl r10, r10, 0, 56 + rldicl r3, r3, 0, 56 + subf r3, r10, r3 + extsw r3, r3 + blr + + .align 4 +L(pagecross_check): + subfic r9, r9, 4096 + subfic r7, r7, 4096 + cmpld cr7, r7, r9 + bge cr7, L(pagecross) + mr r7, r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7, r3, r7 + subf r9, r3, r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9, 0(r3) + lbz r10, 0(r4) + addi r3, r3, 1 + addi r4, r4, 1 + cmplw cr7, r9, r10 + cmpdi cr5, r9, r0 + bne cr7, L(pagecross_ne) + beq cr5, L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align) + + .align 4 +L(pagecross_ne): + extsw r3, r9 + mr r9, r10 +L(pagecross_retdiff): + subf r9, r9, r3 + extsw r3, r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3, 0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S new file mode 100644 index 0000000000..c946a5c638 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S @@ -0,0 +1,379 @@ +/* Optimized strncmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment for first 32 bytes and uses + vectorised loops after that. */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + cmplw cr6, r5, r11; \ + ble cr6, 2f; \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when minimum binutils + is upgraded to 2.27. */ + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + cmpdi cr0, r5, 0 + beq cr0, L(ret0) + li r0, 0 + + /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ + rldicl r8, r3, 0, 52 + cmpldi cr7, r8, 4096-32 + bgt cr7, L(pagecross) + rldicl r9, r4, 0, 52 + cmpldi cr7, r9, 4096-32 + bgt cr7, L(pagecross) + + /* For short strings up to 32 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + + /* If the strings compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 +L(align): + /* Now it has checked for first 32 bytes, align source1 to doubleword + and adjust source2 address. */ + vspltisb v0, 0 + vspltisb v2, -1 + or r6, r4, r3 + andi. r6, r6, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 /* Compute mask. */ + clrldi r6, r4, 60 + subfic r11, r6, 16 + andi. r6, r3, 0xF + beq cr0, L(s1_align) + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v5, r4, v6) + lvsr v10, 0, r3 /* Compute mask. */ + clrldi r6, r3, 60 + subfic r11, r6, 16 + GET16BYTES(v4, r3, v10) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + subf r5, r11, r5 + add r3, r3, r11 + add r4, r4, r11 + andi. r11, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + clrldi r6, r4, 60 + subfic r11, r6, 16 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, checks for null + and compares them. Loops until a mismatch or null occurs. */ +L(s1_align): + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(s1_align) + .align 4 +L(aligned): + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(aligned) + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + cmplw cr7, r5, r6 + ble cr7, L(ret0) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(ret0): + li r9, 0 +L(ret1): + mr r3, r9 + blr + + /* The code now checks if r8 and r5 are different by issuing a + cmpb and shifts the result based on its output: + + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + return r1 - r2; */ + + .align 4 +L(different1): + neg r11, r8 + sldi r5, r5, 3 + and r8, r11, r8 + addi r5, r5, -8 + cntlzd r8, r8 + subfic r8, r8, 63 + extsw r8, r8 + cmpld cr7, r8, r5 + ble cr7, L(different2) + mr r8, r5 +L(different2): + extsw r8, r8 + srd r7, r7, r8 + srd r9, r9, r8 + rldicl r3, r7, 0, 56 + rldicl r9, r9, 0, 56 + subf r9, r9, 3 + extsw r9, r9 + mr r3, r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7, 0(r3) + lbz r9, 0(r4) + subfic r8, r8,4095 + cmplw cr7, r9, r7 + bne cr7, L(byte_ne_3) + cmpdi cr7, r9, 0 + beq cr7, L(byte_ne_0) + addi r5, r5, -1 + subf r7, r8, r5 + subf r9, r7, r5 + addi r9, r9, 1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7, L(ret0) + lbz r9, 0(r3) + lbz r8, 0(r4) + addi r5, r5, -1 + cmplw cr7, r9, r8 + cmpdi cr5, r9, 0 + bne cr7, L(byte_ne_2) + beq cr5, L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7, r5, 0 + addi r3, r3, 1 + addi r4, r4, 1 + bdnz L(pagecross_loop0) + cmpdi cr7, r7, 0 + li r9, 0 + bne+ cr7, L(align) + b L(ret1) + + .align 4 +L(byte_ne_0): + li r7, 0 +L(byte_ne_1): + subf r9, r9, r7 + extsw r9, r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7, r9 + mr r9, r8 + b L(byte_ne_1) +L(byte_ne_3): + extsw r7, r7 + b L(byte_ne_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S new file mode 100644 index 0000000000..8312f46644 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S @@ -0,0 +1,39 @@ +/* PowerPC64-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +/* We don't need to save the parameter-passing registers as gcc takes + care of that for us. Thus this function looks fairly normal. + In fact, the generic code would work for us. */ + +ENTRY(_mcount) + mflr r4 + ld r11, 0(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_MIN_SIZE) + std r4, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + ld r3, FRAME_LR_SAVE(r11) + bl JUMPTARGET(__mcount_internal) + nop + ld r0, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE + blr +END(_mcount) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h new file mode 100644 index 0000000000..215e42b63f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h @@ -0,0 +1,124 @@ +/* Dump registers. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/uio.h> +#include <_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +sr0=000000000000020% sr1=000000000000021% dar=000000000000029% dsi=000002a%\n\ +lr=000000000000024% ctr=000000000000023% gr3*=000000000000022% trap=0000028%\n\ +ccr=0000026% xer=0000025%\n\ +gr0-3: 000000000000000% 000000000000001% 000000000000002% 000000000000003%\n\ +gr4-7: 000000000000004% 000000000000005% 000000000000006% 000000000000007%\n\ +gr8-11: 000000000000008% 000000000000009% 00000000000000a% 00000000000000b%\n\ +gr12-15: 00000000000000c% 00000000000000d% 00000000000000e% 00000000000000f%\n\ +gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\ +gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\ +gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\ +gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\ +fscr=000000000000050%\n\ +fp0-3: 000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\ +fp4-7: 000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\ +fp8-11: 000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\ +fp12-15: 00000000000003c% 00000000000003d% 00000000000003e% 00000000000003f%\n\ +fp16-19: 000000000000040% 000000000000041% 000000000000042% 000000000000043%\n\ +fp20-23: 000000000000044% 000000000000045% 000000000000046% 000000000000047%\n\ +fp24-27: 000000000000048% 000000000000049% 00000000000004a% 00000000000004b%\n\ +fp28-31: 00000000000004c% 00000000000004d% 00000000000004e% 00000000000004f%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned long *regs = (unsigned long *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer) - 1); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c new file mode 100644 index 0000000000..f3ed8ad1e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c @@ -0,0 +1,4 @@ +/* PPCA2 has a different cache-line size than the usual 128 bytes. To avoid + using code that assumes cache-line size to be 128 bytes (with dcbz + instructions) we use the generic code instead. */ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S new file mode 100644 index 0000000000..20f6cf364c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -0,0 +1,245 @@ +/* setjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +#include <novmxsetjmp.h> +#else +#include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (setjmp_symbol) + CALL_MCOUNT 1 + li r4,1 /* Set second argument to 1. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (setjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of _setjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. __GI__setjmp is used in csu/libc-start.c when + HAVE_CLEANUP_JMP_BUF is defined. */ +ENTRY (__GI__setjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI__setjmp) +#endif + +ENTRY (_setjmp_symbol) + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (_setjmp_symbol) +libc_hidden_def (_setjmp_symbol) + +ENTRY (__sigsetjmp_symbol) + CALL_MCOUNT 2 +JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)): +#ifdef PTR_MANGLE + mr r5, r1 + PTR_MANGLE (r5, r6) + std r5,(JB_GPR1*8)(3) +#else + std r1,(JB_GPR1*8)(3) +#endif + mflr r0 +#if defined SHARED && !IS_IN (rtld) + ld r5,FRAME_TOC_SAVE(r1) /* Retrieve the callers TOC. */ + std r5,(JB_GPR2*8)(3) +#else + std r2,(JB_GPR2*8)(3) +#endif + /* setjmp probe expects longjmp first argument (8@3), second argument + (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (setjmp, 3, 8@3, -4@4, 8@0) + std r14,((JB_GPRS+0)*8)(3) + stfd fp14,((JB_FPRS+0)*8)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif + std r0,(JB_LR*8)(3) + std r15,((JB_GPRS+1)*8)(3) + stfd fp15,((JB_FPRS+1)*8)(3) + mfcr r0 + std r16,((JB_GPRS+2)*8)(3) + stfd fp16,((JB_FPRS+2)*8)(3) + stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */ + std r17,((JB_GPRS+3)*8)(3) + stfd fp17,((JB_FPRS+3)*8)(3) + std r18,((JB_GPRS+4)*8)(3) + stfd fp18,((JB_FPRS+4)*8)(3) + std r19,((JB_GPRS+5)*8)(3) + stfd fp19,((JB_FPRS+5)*8)(3) + std r20,((JB_GPRS+6)*8)(3) + stfd fp20,((JB_FPRS+6)*8)(3) + std r21,((JB_GPRS+7)*8)(3) + stfd fp21,((JB_FPRS+7)*8)(3) + std r22,((JB_GPRS+8)*8)(3) + stfd fp22,((JB_FPRS+8)*8)(3) + std r23,((JB_GPRS+9)*8)(3) + stfd fp23,((JB_FPRS+9)*8)(3) + std r24,((JB_GPRS+10)*8)(3) + stfd fp24,((JB_FPRS+10)*8)(3) + std r25,((JB_GPRS+11)*8)(3) + stfd fp25,((JB_FPRS+11)*8)(3) + std r26,((JB_GPRS+12)*8)(3) + stfd fp26,((JB_FPRS+12)*8)(3) + std r27,((JB_GPRS+13)*8)(3) + stfd fp27,((JB_FPRS+13)*8)(3) + std r28,((JB_GPRS+14)*8)(3) + stfd fp28,((JB_FPRS+14)*8)(3) + std r29,((JB_GPRS+15)*8)(3) + stfd fp29,((JB_FPRS+15)*8)(3) + std r30,((JB_GPRS+16)*8)(3) + stfd fp30,((JB_FPRS+16)*8)(3) + std r31,((JB_GPRS+17)*8)(3) + stfd fp31,((JB_FPRS+17)*8)(3) +#ifndef __NO_VMX__ + ld r6,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) +# else + ld r6,0(r6) /* Load extern _dl_hwcap. */ +# endif + andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE + stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + addi r6,r5,16 + beq+ L(aligned_save_vmx) + + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 + +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr + + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ + + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) + + b L(no_vmx) + +L(aligned_save_vmx): + stvx 20,0,r5 + addi r5,r5,32 + stvx 21,0,r6 + addi r6,r6,32 + stvx 22,0,r5 + addi r5,r5,32 + stvx 23,0,r6 + addi r6,r6,32 + stvx 24,0,r5 + addi r5,r5,32 + stvx 25,0,r6 + addi r6,r6,32 + stvx 26,0,r5 + addi r5,r5,32 + stvx 27,0,r6 + addi r6,r6,32 + stvx 28,0,r5 + addi r5,r5,32 + stvx 29,0,r6 + addi r6,r6,32 + stvx 30,0,r5 + stvx 31,0,r6 +L(no_vmx): +#else + li r6,0 +#endif +#if IS_IN (rtld) + li r3,0 + blr +#elif defined SHARED + b JUMPTARGET (__sigjmp_save_symbol) +#else + mflr r0 + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE) + cfi_offset(lr,FRAME_LR_SAVE) + bl JUMPTARGET (__sigjmp_save_symbol) + nop + ld r0,FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + addi r1,r1,FRAME_MIN_SIZE + mtlr r0 + blr +#endif +END (__sigsetjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of __sigsetjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. */ +ENTRY (__GI___sigsetjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI___sigsetjmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S new file mode 100644 index 0000000000..3f61d28203 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S @@ -0,0 +1,61 @@ +/* AltiVec (new) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define setjmp_symbol setjmp +# define _setjmp_symbol _setjmp +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define setjmp_symbol __vmxsetjmp +# define _setjmp_symbol __vmx_setjmp +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__vmxsetjmp, __vmx__setjmp) +strong_alias (__vmx__sigsetjmp, __setjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# undef setjmp_symbol +# undef _setjmp_symbol +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +# define __NO_VMX__ +compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3) +compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3); +compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3) +# define setjmp_symbol __novmxsetjmp +# define _setjmp_symbol __novmx_setjmp +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__novmxsetjmp, __novmx__setjmp) +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h new file mode 100644 index 0000000000..e80a683e64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h @@ -0,0 +1,14 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ \ + uintptr_t x; \ + asm ("ld %0,%1(13)" \ + : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \ + ); \ + x; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/start.S b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S new file mode 100644 index 0000000000..937c39a740 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S @@ -0,0 +1,92 @@ +/* Startup code for programs linked with GNU libc. PowerPC64 version. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* We do not want .eh_frame info for crt1.o since crt1.o is linked + before crtbegin.o, the file defining __EH_FRAME_BEGIN__. */ +#undef cfi_startproc +#define cfi_startproc +#undef cfi_endproc +#define cfi_endproc + + /* These are the various addresses we require. */ +#ifdef PIC + .section ".data.rel.ro.local","aw" +#else + .section ".rodata" +#endif + .align 3 +L(start_addresses): + .quad 0 /* was _SDA_BASE_ but not in 64-bit ABI*/ +/* function descriptors so don't need JUMPTARGET */ + .quad main + .quad __libc_csu_init + .quad __libc_csu_fini + + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".toc","aw" +.L01: + .tc L(start_addresses)[TC],L(start_addresses) + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrdi r1,r1,4 + li r0,0 + stdu r1,-128(r1) + mtlr r0 + std r0,0(r1) + + /* put the address of start_addresses in r8... ** +** PPC64 ABI uses R13 for thread local, so we leave it alone */ + ld r8,.L01@toc(r2) + + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(__libc_start_main) +/* The linker needs this nop to recognize that it's OK to call via a + TOC adjusting stub. */ + nop + +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S new file mode 100644 index 0000000000..cbfcc14cfe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S @@ -0,0 +1,155 @@ +/* Optimized strchr implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +#ifndef STRCHR +# define STRCHR strchr +#endif + +ENTRY (STRCHR) + CALL_MCOUNT 2 + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + dcbt 0,rRTN + insrdi rCHR, rCHR, 8, 48 + li rMASK, -1 + insrdi rCHR, rCHR, 16, 32 + rlwinm rIGN, rRTN, 3, 26, 28 + insrdi rCHR, rCHR, 32, 0 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrdi rSTR, rRTN, 3 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP1, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP1 +/* Test the first (partial?) word. */ + ld rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rIGN +#else + srd rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + ldu rWORD, 8(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmpld rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 64-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + cmpld rWORD, rTMP2 + bgtlr +#endif + srdi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + subfic rCLZB, rCLZB, 64-7-64 + sradi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + subi rSTR, rSTR, 8 + srdi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (STRCHR) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S new file mode 100644 index 0000000000..ab5f8c231c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S @@ -0,0 +1,180 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + lis rFEFE, -0x101 + bne L(unaligned) + + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): ldu rWORD1, 8(rSTR1) + bne cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + blr +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S new file mode 100644 index 0000000000..1466624c6a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S @@ -0,0 +1,203 @@ +/* Optimized strlen implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. You get 0x80 in bytes that + match, but possibly false 0x80 matches in the next more significant + byte to a true match due to carries. For little-endian this is + of no consequence since the least significant match is the one + we're interested in, but big-endian needs method 2 to find which + byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. + + Answer: + 1) Added a Data Cache Block Touch early to prefetch the first 128 + byte cache line. Adding dcbt instructions to the loop would not be + effective since most strings will be shorter than the cache line. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +ENTRY (STRLEN) + CALL_MCOUNT 1 + +#define rTMP4 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rWORD1 r8 /* current string doubleword */ +#define rWORD2 r9 /* next string doubleword */ +#define rMASK r9 /* mask for first string doubleword */ +#define rTMP1 r10 +#define rTMP2 r11 +#define rTMP3 r12 + + dcbt 0,rRTN + clrrdi rSTR, rRTN, 3 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 26, 28 + ld rWORD1, 0(rSTR) + addi r7F7F, r7F7F, 0x7f7f + li rMASK, -1 + insrdi r7F7F, r7F7F, 32, 0 +/* We use method (2) on the first two doublewords, because rFEFE isn't + required which reduces setup overhead. Also gives a faster return + for small strings on big-endian due to needing to recalculate with + method (2) anyway. */ +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rPADN +#else + srd rMASK, rMASK, rPADN +#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + lis rFEFE, -0x101 + add rTMP1, rTMP1, r7F7F + addi rFEFE, rFEFE, -0x101 + nor rTMP3, rTMP2, rTMP1 + and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + sldi rTMP1, rFEFE, 32 + add rFEFE, rFEFE, rTMP1 +/* Are we now aligned to a doubleword boundary? */ + bt 28, L(loop) + +/* Handle second doubleword of pair. */ +/* Perhaps use method (1) here for little-endian, saving one instruction? */ + ldu rWORD1, 8(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + +/* The loop. */ + +L(loop): + ld rWORD1, 8(rSTR) + ldu rWORD2, 16(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP3, rTMP3, rTMP4 + beq L(loop) + +#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 8 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first doubleword in the string that + contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00 + otherwise. */ +L(done0): + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srdi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#else + +L(done0): + addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ + andc rTMP1, rTMP1, rTMP3 + cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */ + subf rTMP3, rRTN, rSTR + subfic rTMP1, rTMP1, 64-7 + srdi rTMP1, rTMP1, 3 + add rRTN, rTMP1, rTMP3 + blr + +L(done1): + addi rTMP3, rTMP1, -1 + andc rTMP3, rTMP3, rTMP1 + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + subfic rTMP3, rTMP3, 64-7-64 + sradi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#endif + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S new file mode 100644 index 0000000000..076599804a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S @@ -0,0 +1,210 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(uz) +L(ux): + li rRTN, 0 + blr + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) +L(u0): + lbzu rWORD2, 1(rSTR2) +L(u1): + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + beq- cr1, L(u3) + lbzu rWORD1, 1(rSTR1) + bne- L(u2) + lbzu rWORD2, 1(rSTR2) + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + bne+ cr1, L(u0) + +L(u2): lbzu rWORD1, -1(rSTR1) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S new file mode 100644 index 0000000000..df93b4c3f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S @@ -0,0 +1,21 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and subtract + the result to a second limb vector. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_SUBMUL +#include "addmul_1.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h new file mode 100644 index 0000000000..db7c1d78b5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h @@ -0,0 +1,425 @@ +/* Assembly macros for 64-bit PowerPC. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/sysdep.h> + +#ifdef __ASSEMBLER__ + +/* Stack frame offsets. */ +#if _CALL_ELF != 2 +#define FRAME_MIN_SIZE 112 +#define FRAME_MIN_SIZE_PARM 112 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 40 +#define FRAME_PARM_SAVE 48 +#define FRAME_PARM1_SAVE 48 +#define FRAME_PARM2_SAVE 56 +#define FRAME_PARM3_SAVE 64 +#define FRAME_PARM4_SAVE 72 +#define FRAME_PARM5_SAVE 80 +#define FRAME_PARM6_SAVE 88 +#define FRAME_PARM7_SAVE 96 +#define FRAME_PARM8_SAVE 104 +#define FRAME_PARM9_SAVE 112 +#else +#define FRAME_MIN_SIZE 32 +#define FRAME_MIN_SIZE_PARM 96 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 24 +#define FRAME_PARM_SAVE 32 +#define FRAME_PARM1_SAVE 32 +#define FRAME_PARM2_SAVE 40 +#define FRAME_PARM3_SAVE 48 +#define FRAME_PARM4_SAVE 56 +#define FRAME_PARM5_SAVE 64 +#define FRAME_PARM6_SAVE 72 +#define FRAME_PARM7_SAVE 80 +#define FRAME_PARM8_SAVE 88 +#define FRAME_PARM9_SAVE 96 +#endif + +/* Support macros for CALL_MCOUNT. */ +#if _CALL_ELF == 2 +#define call_mcount_parm_offset (-64) +#else +#define call_mcount_parm_offset FRAME_PARM_SAVE +#endif + .macro SAVE_ARG NARG + .if \NARG + SAVE_ARG \NARG-1 + std 2+\NARG,call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro REST_ARG NARG + .if \NARG + REST_ARG \NARG-1 + ld 2+\NARG,FRAME_MIN_SIZE_PARM+call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro CFI_SAVE_ARG NARG + .if \NARG + CFI_SAVE_ARG \NARG-1 + cfi_offset(2+\NARG,call_mcount_parm_offset-8+8*(\NARG)) + .endif + .endm + + .macro CFI_REST_ARG NARG + .if \NARG + CFI_REST_ARG \NARG-1 + cfi_restore(2+\NARG) + .endif + .endm + +/* If compiled for profiling, call `_mcount' at the start of each function. + see ppc-mcount.S for more details. */ + .macro CALL_MCOUNT NARG +#ifdef PROF + mflr r0 + SAVE_ARG \NARG + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE_PARM(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE_PARM) + cfi_offset(lr,FRAME_LR_SAVE) + CFI_SAVE_ARG \NARG + bl JUMPTARGET (_mcount) +#ifndef SHARED + nop +#endif + ld r0,FRAME_MIN_SIZE_PARM+FRAME_LR_SAVE(r1) + REST_ARG \NARG + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE_PARM + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE_PARM) + cfi_restore(lr) + CFI_REST_ARG \NARG +#endif + .endm + +#if _CALL_ELF != 2 + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + ld r12,0(\PTR) + ld r2,8(\PTR) + mtctr r12 + ld r11,16(\PTR) + .endm + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase +#else +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase, 0 +#endif + +#define ENTRY_1(name) \ + .type BODY_LABEL(name),@function; \ + .globl name; \ + .section ".opd","aw"; \ + .align 3; \ +name##: OPD_ENT (name); \ + .previous; + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) .LY##X +#define ENTRY_2(name) \ + .type name,@function; \ + ENTRY_1(name) +#define END_2(name) \ + .size name,.-BODY_LABEL(name); \ + .size BODY_LABEL(name),.-BODY_LABEL(name); +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + mr r12,\PTR + mtctr r12 + .endm + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) X +#define ENTRY_2(name) \ + .globl name; \ + .type name,@function; +#define END_2(name) \ + .size name,.-name; +#define LOCALENTRY(name) \ +1: addis r2,r12,.TOC.-1b@ha; \ + addi r2,r2,.TOC.-1b@l; \ + .localentry name,.-name; + +#endif /* _CALL_ELF */ + +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(2); \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^alignt boundary. */ +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +/* Local labels stripped out by the linker. */ +#undef L +#define L(x) .L##x + +#define tostring(s) #s +#define stringify(s) tostring(s) +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE(a,b) +#define LT_LABEL(name) GLUE(.LT,name) +#define LT_LABELSUFFIX(name,suffix) GLUE(GLUE(.LT,name),suffix) + +/* Support Traceback tables */ +#define TB_ASM 0x000c000000000000 +#define TB_GLOBALLINK 0x0000800000000000 +#define TB_IS_EPROL 0x0000400000000000 +#define TB_HAS_TBOFF 0x0000200000000000 +#define TB_INT_PROC 0x0000100000000000 +#define TB_HAS_CTL 0x0000080000000000 +#define TB_TOCLESS 0x0000040000000000 +#define TB_FP_PRESENT 0x0000020000000000 +#define TB_LOG_ABORT 0x0000010000000000 +#define TB_INT_HANDL 0x0000008000000000 +#define TB_NAME_PRESENT 0x0000004000000000 +#define TB_USES_ALLOCA 0x0000002000000000 +#define TB_SAVES_CR 0x0000000200000000 +#define TB_SAVES_LR 0x0000000100000000 +#define TB_STORES_BC 0x0000000080000000 +#define TB_FIXUP 0x0000000040000000 +#define TB_FP_SAVED(fprs) (((fprs) & 0x3f) << 24) +#define TB_GPR_SAVED(gprs) (((fprs) & 0x3f) << 16) +#define TB_FIXEDPARMS(parms) (((parms) & 0xff) << 8) +#define TB_FLOATPARMS(parms) (((parms) & 0x7f) << 1) +#define TB_PARMSONSTK 0x0000000000000001 + +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define TB_DEFAULT TB_ASM | TB_HAS_TBOFF | TB_NAME_PRESENT + +#define TRACEBACK(name) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +#define TRACEBACK_MASK(name,mask) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT | mask ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +/* END generates Traceback tables */ +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(name) \ + END_2(name) + +/* This form supports more informative traceback tables */ +#define END_GEN_TB(name,mask) \ + cfi_endproc; \ + TRACEBACK_MASK(name,mask) \ + END_2(name) + +#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + cmpdi 13,0; \ + beq 1f; \ + lwz 0,TM_CAPABLE(13); \ + cmpwi 0,0; \ + beq 1f; \ + li 11,_ABORT_SYSCALL; \ + tabort. 11; \ + .align 4; \ +1: +#else +# define ABORT_TRANSACTION +#endif + +#define DO_CALL(syscall) \ + ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +/* ppc64 is always PIC */ +#undef JUMPTARGET +#define JUMPTARGET(name) DOT_LABEL(name) + +#define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#ifdef SHARED +#define TAIL_CALL_SYSCALL_ERROR \ + b JUMPTARGET(__syscall_error) +#else +/* Static version might be linked into a large app with a toc exceeding + 64k. We can't put a toc adjusting stub on a plain branch, so can't + tail call __syscall_error. */ +#define TAIL_CALL_SYSCALL_ERROR \ + .ifdef .Local_syscall_error; \ + b .Local_syscall_error; \ + .else; \ +.Local_syscall_error: \ + mflr 0; \ + std 0,FRAME_LR_SAVE(1); \ + stdu 1,-FRAME_MIN_SIZE(1); \ + cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \ + cfi_offset(lr,FRAME_LR_SAVE); \ + bl JUMPTARGET(__syscall_error); \ + nop; \ + ld 0,FRAME_MIN_SIZE+FRAME_LR_SAVE(1); \ + addi 1,1,FRAME_MIN_SIZE; \ + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE); \ + mtlr 0; \ + cfi_restore(lr); \ + blr; \ + .endif +#endif + +#define PSEUDO_RET \ + bnslr+; \ + TAIL_CALL_SYSCALL_ERROR + +#define ret PSEUDO_RET + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +#define PSEUDO_NOERRNO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_NOERRNO \ + blr + +#define ret_NOERRNO PSEUDO_RET_NOERRNO + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name) \ + END (name) + +#define PSEUDO_ERRVAL(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_ERRVAL \ + blr + +#define ret_ERRVAL PSEUDO_RET_ERRVAL + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name) \ + END (name) + +#else /* !__ASSEMBLER__ */ + +#if _CALL_ELF != 2 + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "ld 12,0(" #ptr ");\n" \ + "ld 2,8(" #ptr ");\n" \ + "mtctr 12;\n" \ + "ld 11,16(" #ptr ");" + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase;" +#else +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase, 0;" +#endif + +#define ENTRY_1(name) \ + ".type " BODY_PREFIX #name ",@function;\n" \ + ".globl " #name ";\n" \ + ".pushsection \".opd\",\"aw\";\n" \ + ".align 3;\n" \ +#name ":\n" \ + OPD_ENT (name) "\n" \ + ".popsection;" + +#define DOT_PREFIX "" +#define BODY_PREFIX ".LY" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ENTRY_1(name) +#define END_2(name) \ + ".size " #name ",.-" BODY_PREFIX #name ";\n" \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "mr 12," #ptr ";\n" \ + "mtctr 12;" + +#define DOT_PREFIX "" +#define BODY_PREFIX "" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ".globl " #name ";" +#define END_2(name) \ + ".size " #name ",.-" #name ";" +#define LOCALENTRY(name) \ + "1: addis 2,12,.TOC.-1b@ha;\n" \ + "addi 2,2,.TOC.-1b@l;\n" \ + ".localentry " #name ",.-" #name ";" + +#endif /* _CALL_ELF */ + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h new file mode 100644 index 0000000000..42a95ec5c1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h @@ -0,0 +1,44 @@ +/* Include sysdeps/powerpc/tls-macros.h for __TLS_CALL_CLOBBERS */ +#include_next "tls-macros.h" + +/* PowerPC64 Local Exec TLS access. */ +#define TLS_LE(x) \ + ({ int * __result; \ + asm ("addis %0,13," #x "@tprel@ha\n\t" \ + "addi %0,%0," #x "@tprel@l" \ + : "=b" (__result) ); \ + __result; \ + }) +/* PowerPC64 Initial Exec TLS access. */ +#define TLS_IE(x) \ + ({ int * __result; \ + asm ("ld %0," #x "@got@tprel(2)\n\t" \ + "add %0,%0," #x "@tls" \ + : "=r" (__result) ); \ + __result; \ + }) + +#define __TLS_GET_ADDR "__tls_get_addr" + +/* PowerPC64 Local Dynamic TLS access. */ +#define TLS_LD(x) \ + ({ int * __result; \ + asm ("addi 3,2," #x "@got@tlsld\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop \n\t" \ + "addis %0,3," #x "@dtprel@ha\n\t" \ + "addi %0,%0," #x "@dtprel@l" \ + : "=b" (__result) : \ + : "3", __TLS_CALL_CLOBBERS); \ + __result; \ + }) +/* PowerPC64 General Dynamic TLS access. */ +#define TLS_GD(x) \ + ({ register int *__result __asm__ ("r3"); \ + asm ("addi 3,2," #x "@got@tlsgd\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop " \ + : "=r" (__result) : \ + : __TLS_CALL_CLOBBERS); \ + __result; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h new file mode 100644 index 0000000000..b25040b9f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h @@ -0,0 +1,33 @@ +/* Definitions for testing PLT entry/exit auditing. PowerPC64 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if _CALL_ELF != 2 +#define pltenter la_ppc64_gnu_pltenter +#define pltexit la_ppc64_gnu_pltexit +#define La_regs La_ppc64_regs +#define La_retval La_ppc64_retval +#define int_retval lrv_r3 +#else +#define pltenter la_ppc64v2_gnu_pltenter +#define pltexit la_ppc64v2_gnu_pltexit +#define La_regs La_ppc64v2_regs +#define La_retval La_ppc64v2_retval +#define int_retval lrv_r3 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies new file mode 100644 index 0000000000..a105a325f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies @@ -0,0 +1 @@ +powerpc/powerpc64 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies new file mode 100644 index 0000000000..c1f617b7da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies new file mode 100644 index 0000000000..8d6531a174 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies new file mode 100644 index 0000000000..30edcf7f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies new file mode 100644 index 0000000000..eedef823d5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies new file mode 100644 index 0000000000..8447198fbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..7fd86fdf87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies new file mode 100644 index 0000000000..1fc7b7cd39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies new file mode 100644 index 0000000000..3c37351dcc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies new file mode 100644 index 0000000000..ae0dbaf857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..f11e1bdba2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies new file mode 100644 index 0000000000..dd6bca4b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies new file mode 100644 index 0000000000..efe5d4193c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies new file mode 100644 index 0000000000..3633114b47 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..105948092d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies new file mode 100644 index 0000000000..02be30cfd0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/preconfigure b/REORG.TODO/sysdeps/powerpc/preconfigure new file mode 100644 index 0000000000..7de2eafd52 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/preconfigure @@ -0,0 +1,20 @@ +# preconfigure fragment for powerpc. + +case "$machine" in +powerpc64le) + base_machine=powerpc machine=powerpc/powerpc64le + ;; +powerpc64*) + base_machine=powerpc machine=powerpc/powerpc64 + ;; +powerpc*) + # Check for e500. + $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null > conftest.i + if grep -q __NO_FPRS__ conftest.i && ! grep -q _SOFT_FLOAT conftest.i; then + base_machine=powerpc machine=powerpc/powerpc32/e500 + else + base_machine=powerpc machine=powerpc/powerpc32 + fi + rm -f conftest.i + ;; +esac diff --git a/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym b/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym new file mode 100644 index 0000000000..f5ea5a1466 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym @@ -0,0 +1,8 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) +RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET rtld_global_ro_offsetof (_dl_hwcap2) diff --git a/REORG.TODO/sysdeps/powerpc/sched_cpucount.c b/REORG.TODO/sysdeps/powerpc/sched_cpucount.c new file mode 100644 index 0000000000..13d17ac555 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sched_cpucount.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef _ARCH_PWR5 +# define POPCNT(l) __builtin_popcountl (l) +#endif + +#include <posix/sched_cpucount.c> diff --git a/REORG.TODO/sysdeps/powerpc/sigjmp.c b/REORG.TODO/sysdeps/powerpc/sigjmp.c new file mode 100644 index 0000000000..6d593a0992 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sigjmp.c @@ -0,0 +1,39 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Versioned copy of sysdeps/generic/sigjmp.c modified for AltiVec support. */ + +#include <shlib-compat.h> +#include <stddef.h> +#include <setjmp.h> +#include <signal.h> + +/* This function is called by the `sigsetjmp' macro + before doing a `__setjmp' on ENV[0].__jmpbuf. + Always return zero. */ + +int +__vmx__sigjmp_save (sigjmp_buf env, int savemask) +{ + env[0].__mask_was_saved = (savemask && + __sigprocmask (SIG_BLOCK, (sigset_t *) NULL, + &env[0].__saved_mask) == 0); + + return 0; +} + +strong_alias (__vmx__sigjmp_save,__sigjmp_save) diff --git a/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..d92a90e3e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h @@ -0,0 +1,114 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* Someone please check this. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define _FP_TININESS_AFTER_ROUNDING 0 + +#if defined __NO_FPRS__ && !defined _SOFT_FLOAT + +/* Exception flags. We use the bit positions of the appropriate bits + in the FPEFSCR. */ + +# include <fenv_libc.h> +# include <sysdep.h> +# include <sys/prctl.h> + +int __feraiseexcept_soft (int); +libc_hidden_proto (__feraiseexcept_soft) + +# define FP_EX_INEXACT SPEFSCR_FINXS +# define FP_EX_INVALID SPEFSCR_FINVS +# define FP_EX_DIVZERO SPEFSCR_FDBZS +# define FP_EX_UNDERFLOW SPEFSCR_FUNFS +# define FP_EX_OVERFLOW SPEFSCR_FOVFS + +# define _FP_DECL_EX \ + int _spefscr __attribute__ ((unused)), _ftrapex __attribute__ ((unused)) = 0 +# define FP_INIT_ROUNDMODE \ + do \ + { \ + int _r; \ + INTERNAL_SYSCALL_DECL (_err); \ + \ + _spefscr = fegetenv_register (); \ + _r = INTERNAL_SYSCALL (prctl, _err, 2, PR_GET_FPEXC, &_ftrapex); \ + if (INTERNAL_SYSCALL_ERROR_P (_r, _err)) \ + _ftrapex = 0; \ + } \ + while (0) +# define FP_INIT_EXCEPTIONS /* Empty. */ + +# define FP_HANDLE_EXCEPTIONS __feraiseexcept_soft (_fex) +# define FP_ROUNDMODE (_spefscr & 0x3) + +/* Not correct in general, but sufficient for the uses in soft-fp. */ +# define FP_TRAPPING_EXCEPTIONS (_ftrapex & PR_FP_EXC_UND \ + ? FP_EX_UNDERFLOW \ + : 0) + +#else + +/* Exception flags. We use the bit positions of the appropriate bits + in the FPSCR, which also correspond to the FE_* bits. This makes + everything easier ;-). */ +# define FP_EX_INVALID (1 << (31 - 2)) +# define FP_EX_OVERFLOW (1 << (31 - 3)) +# define FP_EX_UNDERFLOW (1 << (31 - 4)) +# define FP_EX_DIVZERO (1 << (31 - 5)) +# define FP_EX_INEXACT (1 << (31 - 6)) + +# define FP_HANDLE_EXCEPTIONS __simulate_exceptions (_fex) +# define FP_ROUNDMODE __sim_round_mode_thread +# define FP_TRAPPING_EXCEPTIONS \ + (~__sim_disabled_exceptions_thread & 0x3e000000) + +#endif + +extern __thread int __sim_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_exceptions_thread, tls_model ("initial-exec")); +extern __thread int __sim_disabled_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_disabled_exceptions_thread, + tls_model ("initial-exec")); +extern __thread int __sim_round_mode_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_round_mode_thread, tls_model ("initial-exec")); + +extern void __simulate_exceptions (int x) attribute_hidden; diff --git a/REORG.TODO/sysdeps/powerpc/sotruss-lib.c b/REORG.TODO/sysdeps/powerpc/sotruss-lib.c new file mode 100644 index 0000000000..e5274241a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sotruss-lib.c @@ -0,0 +1,69 @@ +/* PowerPC specific sotruss-lib functions. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +#ifdef __powerpc64__ +# if _CALL_ELF != 2 +# define LA_PPC_REGS La_ppc64_regs +# define LA_PPC_RETVAL La_ppc64_retval +# define LA_PPC_GNU_PLTENTER la_ppc64_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc64_gnu_pltexit +# else +# define LA_PPC_REGS La_ppc64v2_regs +# define LA_PPC_RETVAL La_ppc64v2_retval +# define LA_PPC_GNU_PLTENTER la_ppc64v2_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc64v2_gnu_pltexit +# endif +# else +# define LA_PPC_REGS La_ppc32_regs +# define LA_PPC_RETVAL La_ppc32_retval +# define LA_PPC_GNU_PLTENTER la_ppc32_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc32_gnu_pltexit +#endif + +ElfW(Addr) +LA_PPC_GNU_PLTENTER (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + LA_PPC_REGS *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_reg[0], regs->lr_reg[1], regs->lr_reg[2], *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +LA_PPC_GNU_PLTEXIT (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, + const struct LA_PPC_REGS *inregs, + struct LA_PPC_RETVAL *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_r3); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/stackinfo.h b/REORG.TODO/sysdeps/powerpc/stackinfo.h new file mode 100644 index 0000000000..964ab77d56 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/stackinfo.h @@ -0,0 +1,38 @@ +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On PPC the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +#if __WORDSIZE == 64 +/* PPC64 doesn't need an executable stack and doesn't need PT_GNU_STACK + * to make the stack nonexecutable. */ +# define DEFAULT_STACK_PERMS (PF_R|PF_W) +#else +/* PF_X can be overridden if PT_GNU_STACK is present but is presumed absent. */ +# define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) +#endif + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h b/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h new file mode 100644 index 0000000000..4a0c015766 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h @@ -0,0 +1,146 @@ +/* Facilities specific to the PowerPC architecture + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SYS_PLATFORM_PPC_H +#define _SYS_PLATFORM_PPC_H 1 + +#include <features.h> +#include <stdint.h> +#include <bits/ppc.h> + +/* Read the Time Base Register. */ +static __inline__ uint64_t +__ppc_get_timebase (void) +{ +#if __GNUC_PREREQ (4, 8) + return __builtin_ppc_get_timebase (); +#else +# ifdef __powerpc64__ + uint64_t __tb; + /* "volatile" is necessary here, because the user expects this assembly + isn't moved after an optimization. */ + __asm__ volatile ("mfspr %0, 268" : "=r" (__tb)); + return __tb; +# else /* not __powerpc64__ */ + uint32_t __tbu, __tbl, __tmp; \ + __asm__ volatile ("0:\n\t" + "mftbu %0\n\t" + "mftbl %1\n\t" + "mftbu %2\n\t" + "cmpw %0, %2\n\t" + "bne- 0b" + : "=r" (__tbu), "=r" (__tbl), "=r" (__tmp)); + return (((uint64_t) __tbu << 32) | __tbl); +# endif /* not __powerpc64__ */ +#endif +} + +/* The following functions provide hints about the usage of shared processor + resources, as defined in ISA 2.06 and newer. */ + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released for use by other + processors. */ +static __inline__ void +__ppc_yield (void) +{ + __asm__ volatile ("or 27,27,27"); +} + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released until + all outstanding storage accesses to caching-inhibited storage have been + completed. */ +static __inline__ void +__ppc_mdoio (void) +{ + __asm__ volatile ("or 29,29,29"); +} + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released until all + outstanding storage accesses to cacheable storage for which the data is not + in the cache have been completed. */ +static __inline__ void +__ppc_mdoom (void) +{ + __asm__ volatile ("or 30,30,30"); +} + + +/* ISA 2.05 and beyond support the Program Priority Register (PPR) to adjust + thread priorities based on lock acquisition, wait and release. The ISA + defines the use of form 'or Rx,Rx,Rx' as the way to modify the PRI field. + The unprivileged priorities are: + Rx = 1 (low) + Rx = 2 (medium) + Rx = 6 (medium-low/normal) + The 'or' instruction form is a nop in previous hardware, so it is safe to + use unguarded. The default value is 'medium'. + */ + +static __inline__ void +__ppc_set_ppr_med (void) +{ + __asm__ volatile ("or 2,2,2"); +} + +static __inline__ void +__ppc_set_ppr_med_low (void) +{ + __asm__ volatile ("or 6,6,6"); +} + +static __inline__ void +__ppc_set_ppr_low (void) +{ + __asm__ volatile ("or 1,1,1"); +} + +/* Power ISA 2.07 (Book II, Chapter 3) extends the priorities that can be set + to the Program Priority Register (PPR). The form 'or Rx,Rx,Rx' is used to + modify the PRI field of the PPR, the same way as described above. + The new priority levels are: + Rx = 31 (very low) + Rx = 5 (medium high) + Any program can set the priority to very low, low, medium low, and medium, + as these are unprivileged. + The medium high priority, on the other hand, is privileged, and may only be + set during certain time intervals by problem-state programs. If the program + priority is medium high when the time interval expires or if an attempt is + made to set the priority to medium high when it is not allowed, the PRI + field is set to medium. + */ + +#ifdef _ARCH_PWR8 + +static __inline__ void +__ppc_set_ppr_very_low (void) +{ + __asm__ volatile ("or 31,31,31"); +} + +static __inline__ void +__ppc_set_ppr_med_high (void) +{ + __asm__ volatile ("or 5,5,5"); +} + +#endif + +#endif /* sys/platform/ppc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/sysdep.h b/REORG.TODO/sysdeps/powerpc/sysdep.h new file mode 100644 index 0000000000..f07b959eee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sysdep.h @@ -0,0 +1,189 @@ +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Powerpc Feature masks for the Aux Vector Hardware Capabilities (AT_HWCAP). + * This entry is copied to _dl_hwcap or rtld_global._dl_hwcap during startup. + */ +#define _SYSDEPS_SYSDEP_H 1 +#include <bits/hwcap.h> +#ifdef ENABLE_LOCK_ELISION +#include <tls.h> +#include <htm.h> +#endif + +#define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC) + +#ifdef __ASSEMBLER__ + +/* Symbolic names for the registers. The only portable way to write asm + code is to use number but this produces really unreadable code. + Therefore these symbolic names. */ + +/* Integer registers. */ +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +/* Floating-point registers. */ +#define fp0 0 +#define fp1 1 +#define fp2 2 +#define fp3 3 +#define fp4 4 +#define fp5 5 +#define fp6 6 +#define fp7 7 +#define fp8 8 +#define fp9 9 +#define fp10 10 +#define fp11 11 +#define fp12 12 +#define fp13 13 +#define fp14 14 +#define fp15 15 +#define fp16 16 +#define fp17 17 +#define fp18 18 +#define fp19 19 +#define fp20 20 +#define fp21 21 +#define fp22 22 +#define fp23 23 +#define fp24 24 +#define fp25 25 +#define fp26 26 +#define fp27 27 +#define fp28 28 +#define fp29 29 +#define fp30 30 +#define fp31 31 + +/* Condition code registers. */ +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + +/* Vector registers. */ +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 + +#define VRSAVE 256 + +/* The 32-bit words of a 64-bit dword are at these offsets in memory. */ +#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define LOWORD 0 +# define HIWORD 4 +#else +# define LOWORD 4 +# define HIWORD 0 +#endif + +/* The high 16-bit word of a 64-bit dword is at this offset in memory. */ +#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define HISHORT 6 +#else +# define HISHORT 0 +#endif + +/* This seems to always be the case on PPC. */ +#define ALIGNARG(log2) log2 +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name + +#else + +/* Linux kernel powerpc documentation [1] states issuing a syscall inside a + transaction is not recommended and may lead to undefined behavior. It + also states syscalls do not abort transactions. To avoid such traps, + we abort transaction just before syscalls. + + [1] Documentation/powerpc/transactional_memory.txt [Syscalls] */ +#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + ({ \ + if (THREAD_GET_TM_CAPABLE ()) \ + __libc_tabort (_ABORT_SYSCALL); \ + }) +#else +# define ABORT_TRANSACTION +#endif + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/test-arith.c b/REORG.TODO/sysdeps/powerpc/test-arith.c new file mode 100644 index 0000000000..aa1568d4d7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-arith.c @@ -0,0 +1,604 @@ +/* Test floating-point arithmetic operations. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fenv.h> +#include <assert.h> + +#ifndef ESIZE +typedef double tocheck_t; +#define ESIZE 11 +#define MSIZE 52 +#define FUNC(x) x +#endif + +#define R_NEAREST 1 +#define R_ZERO 2 +#define R_UP 4 +#define R_DOWN 8 +#define R_ALL (R_NEAREST|R_ZERO|R_UP|R_DOWN) +static fenv_t rmodes[4]; +static const char * const rmnames[4] = +{ "nearest","zero","+Inf","-Inf" }; + +typedef union { + tocheck_t tc; + unsigned char c[sizeof(tocheck_t)]; +} union_t; + +/* Don't try reading these in a font that doesn't distinguish + O and zero. */ +typedef enum { + P_Z = 0x0, /* 00000...0 */ + P_000O = 0x1, /* 00011...1 */ + P_001Z = 0x2, /* 00100...0 */ + P_00O = 0x3, /* 00111...1 */ + P_01Z = 0x4, /* 01000...0 */ + P_010O = 0x5, /* 01011...1 */ + P_011Z = 0x6, /* 01100...0 */ + P_0O = 0x7, /* 01111...1 */ + P_1Z = 0x8, /* 10000...0 */ + P_100O = 0x9, /* 10011...1 */ + P_101Z = 0xa, /* 10100...0 */ + P_10O = 0xb, /* 10111...1 */ + P_11Z = 0xc, /* 11000...0 */ + P_110O = 0xd, /* 11011...1 */ + P_111Z = 0xe, /* 11100...0 */ + P_O = 0xf, /* 11111...1 */ + P_Z1 = 0x11, /* 000...001 */ + P_Z10 = 0x12, /* 000...010 */ + P_Z11 = 0x13, /* 000...011 */ + P_0O00 = 0x14, /* 011...100 */ + P_0O01 = 0x15, /* 011...101 */ + P_0O0 = 0x16, /* 011...110 */ + P_1Z1 = 0x19, /* 100...001 */ + P_1Z10 = 0x1a, /* 100...010 */ + P_1Z11 = 0x1b, /* 100...011 */ + P_O00 = 0x1c, /* 111...100 */ + P_O01 = 0x1d, /* 111...101 */ + P_O0 = 0x1e, /* 111...110 */ + P_R = 0x20, /* rrr...rrr */ /* ('r' means random. ) */ + P_Ro = 0x21, /* rrr...rrr, with odd parity. */ + P_0R = 0x22, /* 0rr...rrr */ + P_1R = 0x23, /* 1rr...rrr */ + P_Rno = 0x24, /* rrr...rrr, but not all ones. */ +} pattern_t; + +static void +pattern_fill(pattern_t ptn, unsigned char *start, int bitoffset, int count) +{ +#define bitset(count, value) \ + start[(count)/8] = (start[(count)/8] & ~(1 << 7-(count)%8) \ + | (value) << 7-(count)%8) + int i; + + if (ptn >= 0 && ptn <= 0xf) + { + /* Patterns between 0 and 0xF have the following format: + The LSBit is used to fill the last n-3 bits of the pattern; + The next 3 bits are the first 3 bits of the pattern. */ + for (i = 0; i < count; i++) + if (i < 3) + bitset((bitoffset+i), ptn >> (3-i) & 1); + else + bitset((bitoffset+i), ptn >> 0 & 1); + } + else if (ptn <= 0x1f) + { + /* Patterns between 0x10 and 0x1F have the following format: + The two LSBits are the last two bits of the pattern; + The 0x8 bit is the first bit of the pattern; + The 0x4 bit is used to fill the remainder. */ + for (i = 0; i < count; i++) + if (i == 0) + bitset((bitoffset+i), ptn >> 3 & 1); + else if (i >= count-2) + bitset((bitoffset+i), ptn >> (count-1-i) & 1); + else + bitset((bitoffset+i), ptn >> 2 & 1); + } + else switch (ptn) + { + case P_0R: case P_1R: + assert(count > 0); + bitset(bitoffset, ptn & 1); + count--; + bitoffset++; + case P_R: + for (; count > 0; count--, bitoffset++) + bitset(bitoffset, rand() & 1); + break; + case P_Ro: + { + int op = 1; + assert(count > 0); + for (; count > 1; count--, bitoffset++) + bitset(bitoffset, op ^= (rand() & 1)); + bitset(bitoffset, op); + break; + } + case P_Rno: + { + int op = 1; + assert(count > 0); + for (; count > 1; count--, bitoffset++) + { + int r = rand() & 1; + op &= r; + bitset(bitoffset, r); + } + bitset(bitoffset, rand() & (op ^ 1)); + break; + } + + default: + assert(0); + } +#undef bitset +} + +static tocheck_t +pattern(int negative, pattern_t exp, pattern_t mant) +{ + union_t result; +#if 0 + int i; +#endif + + pattern_fill(negative ? P_O : P_Z, result.c, 0, 1); + pattern_fill(exp, result.c, 1, ESIZE); + pattern_fill(mant, result.c, ESIZE+1, MSIZE); +#if 0 + printf("neg=%d exp=%02x mant=%02x: ", negative, exp, mant); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", result.c[i]); + printf("\n"); +#endif + return result.tc; +} + +/* Return the closest different tocheck_t to 'x' in the direction of + 'direction', or 'x' if there is no such value. Assumes 'x' is not + a NaN. */ +static tocheck_t +delta(tocheck_t x, int direction) +{ + union_t xx; + int i; + + xx.tc = x; + if (xx.c[0] & 0x80) + direction = -direction; + if (direction == +1) + { + union_t tx; + tx.tc = pattern(xx.c[0] >> 7, P_O, P_Z); + if (memcmp(tx.c, xx.c, sizeof(tocheck_t)) == 0) + return x; + } + for (i = sizeof(tocheck_t)-1; i > 0; i--) + { + xx.c[i] += direction; + if (xx.c[i] != (direction > 0 ? 0 : 0xff)) + return xx.tc; + } + if (direction < 0 && (xx.c[0] & 0x7f) == 0) + return pattern(~(xx.c[0] >> 7) & 1, P_Z, P_Z1); + else + { + xx.c[0] += direction; + return xx.tc; + } +} + +static int nerrors = 0; + +#ifdef FE_ALL_INVALID +static const int all_exceptions = FE_ALL_INVALID | FE_ALL_EXCEPT; +#else +static const int all_exceptions = FE_ALL_EXCEPT; +#endif + +static void +check_result(int line, const char *rm, tocheck_t expected, tocheck_t actual) +{ + if (memcmp(&expected, &actual, sizeof(tocheck_t)) != 0) + { + unsigned char *ex, *ac; + size_t i; + + printf("%s:%d:round %s:result failed\n" + " expected result 0x", __FILE__, line, rm); + ex = (unsigned char *)&expected; + ac = (unsigned char *)&actual; + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", ex[i]); + printf(" got 0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", ac[i]); + printf("\n"); + nerrors++; + } +} + +static const struct { + int except; + const char *name; +} excepts[] = { +#define except_entry(ex) { ex, #ex } , +#ifdef FE_INEXACT + except_entry(FE_INEXACT) +#else +# define FE_INEXACT 0 +#endif +#ifdef FE_DIVBYZERO + except_entry(FE_DIVBYZERO) +#else +# define FE_DIVBYZERO 0 +#endif +#ifdef FE_UNDERFLOW + except_entry(FE_UNDERFLOW) +#else +# define FE_UNDERFLOW 0 +#endif +#ifdef FE_OVERFLOW + except_entry(FE_OVERFLOW) +#else +# define FE_OVERFLOW 0 +#endif +#ifdef FE_INVALID + except_entry(FE_INVALID) +#else +# define FE_INVALID 0 +#endif +#ifdef FE_INVALID_SNAN + except_entry(FE_INVALID_SNAN) +#else +# define FE_INVALID_SNAN FE_INVALID +#endif +#ifdef FE_INVALID_ISI + except_entry(FE_INVALID_ISI) +#else +# define FE_INVALID_ISI FE_INVALID +#endif +#ifdef FE_INVALID_IDI + except_entry(FE_INVALID_IDI) +#else +# define FE_INVALID_IDI FE_INVALID +#endif +#ifdef FE_INVALID_ZDZ + except_entry(FE_INVALID_ZDZ) +#else +# define FE_INVALID_ZDZ FE_INVALID +#endif +#ifdef FE_INVALID_COMPARE + except_entry(FE_INVALID_COMPARE) +#else +# define FE_INVALID_COMPARE FE_INVALID +#endif +#ifdef FE_INVALID_SOFTWARE + except_entry(FE_INVALID_SOFTWARE) +#else +# define FE_INVALID_SOFTWARE FE_INVALID +#endif +#ifdef FE_INVALID_SQRT + except_entry(FE_INVALID_SQRT) +#else +# define FE_INVALID_SQRT FE_INVALID +#endif +#ifdef FE_INVALID_INTEGER_CONVERSION + except_entry(FE_INVALID_INTEGER_CONVERSION) +#else +# define FE_INVALID_INTEGER_CONVERSION FE_INVALID +#endif +}; + +static int excepts_missing = 0; + +static void +check_excepts(int line, const char *rm, int expected, int actual) +{ + if (expected & excepts_missing) + expected = expected & ~excepts_missing | FE_INVALID_SNAN; + if ((expected & all_exceptions) != actual) + { + size_t i; + printf("%s:%d:round %s:exceptions failed\n" + " expected exceptions ", __FILE__, line,rm); + for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++) + if (expected & excepts[i].except) + printf("%s ",excepts[i].name); + if ((expected & all_exceptions) == 0) + printf("- "); + printf("got"); + for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++) + if (actual & excepts[i].except) + printf(" %s",excepts[i].name); + if ((actual & all_exceptions) == 0) + printf("- "); + printf(".\n"); + nerrors++; + } +} + +typedef enum { + B_ADD, B_SUB, B_MUL, B_DIV, B_NEG, B_ABS, B_SQRT +} op_t; +typedef struct { + int line; + op_t op; + int a_sgn; + pattern_t a_exp, a_mant; + int b_sgn; + pattern_t b_exp, b_mant; + int rmode; + int excepts; + int x_sgn; + pattern_t x_exp, x_mant; +} optest_t; +static const optest_t optests[] = { + /* Additions of zero. */ + {__LINE__,B_ADD, 0,P_Z,P_Z, 0,P_Z,P_Z, R_ALL,0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 0,P_Z,P_Z, R_ALL & ~R_DOWN,0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 0,P_Z,P_Z, R_DOWN,0, 1,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 1,P_Z,P_Z, R_ALL,0, 1,P_Z,P_Z }, + + /* Additions with NaN. */ + {__LINE__,B_ADD, 0,P_O,P_101Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_101Z }, + {__LINE__,B_ADD, 0,P_O,P_01Z, 0,P_Z,P_Z, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_11Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_O,P_0O, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_O }, + {__LINE__,B_ADD, 0,P_Z,P_Z, 0,P_O,P_11Z, R_ALL,0, 0,P_O,P_11Z }, + {__LINE__,B_ADD, 0,P_O,P_001Z, 0,P_O,P_001Z, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_101Z }, + {__LINE__,B_ADD, 0,P_O,P_1Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 0,P_0O,P_Z, 0,P_O,P_10O, R_ALL,0, 0,P_O,P_10O }, + + /* Additions with infinity. */ + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_Z,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_Z,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_Z,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_O,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_O,P_Z, R_ALL, + FE_INVALID | FE_INVALID_ISI, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_O,P_Z, R_ALL, + FE_INVALID | FE_INVALID_ISI, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_0O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_0O,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_0O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_0O,P_Z, R_ALL,0, 1,P_O,P_Z }, + + /* Overflow (and zero). */ + {__LINE__,B_ADD, 0,P_O0,P_Z, 0,P_O0,P_Z, R_NEAREST | R_UP, + FE_INEXACT | FE_OVERFLOW, 0,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 0,P_O0,P_Z, R_ZERO | R_DOWN, + FE_INEXACT | FE_OVERFLOW, 0,P_O0,P_O }, + {__LINE__,B_ADD, 1,P_O0,P_Z, 1,P_O0,P_Z, R_NEAREST | R_DOWN, + FE_INEXACT | FE_OVERFLOW, 1,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O0,P_Z, 1,P_O0,P_Z, R_ZERO | R_UP, + FE_INEXACT | FE_OVERFLOW, 1,P_O0,P_O }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 1,P_O0,P_Z, R_ALL & ~R_DOWN, + 0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 1,P_O0,P_Z, R_DOWN, + 0, 1,P_Z,P_Z }, + + /* Negation. */ + {__LINE__,B_NEG, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z }, + {__LINE__,B_NEG, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_NEG, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 1,P_O,P_Z }, + {__LINE__,B_NEG, 1,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_NEG, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 1,P_O,P_1Z }, + {__LINE__,B_NEG, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_NEG, 0,P_O,P_01Z, 0,0,0, R_ALL, 0, 1,P_O,P_01Z }, + {__LINE__,B_NEG, 1,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_NEG, 0,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 1,P_1Z,P_1Z1 }, + {__LINE__,B_NEG, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_NEG, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 1,P_Z,P_Z1 }, + {__LINE__,B_NEG, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + + /* Absolute value. */ + {__LINE__,B_ABS, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_ABS, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_ABS, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_ABS, 1,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_ABS, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_ABS, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_ABS, 0,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_ABS, 1,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_ABS, 0,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_ABS, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_ABS, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + {__LINE__,B_ABS, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + + /* Square root. */ + {__LINE__,B_SQRT, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_SQRT, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z }, + {__LINE__,B_SQRT, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 1,P_O,P_1Z }, + {__LINE__,B_SQRT, 0,P_O,P_01Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_11Z }, + {__LINE__,B_SQRT, 1,P_O,P_01Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 1,P_O,P_11Z }, + + {__LINE__,B_SQRT, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_SQRT, 0,P_0O,P_Z, 0,0,0, R_ALL, 0, 0,P_0O,P_Z }, + + {__LINE__,B_SQRT, 1,P_O,P_Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_Z,P_Z1, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + +}; + +static void +check_op(void) +{ + size_t i; + int j; + tocheck_t r, a, b, x; + int raised; + + for (i = 0; i < sizeof(optests)/sizeof(optests[0]); i++) + { + a = pattern(optests[i].a_sgn, optests[i].a_exp, + optests[i].a_mant); + b = pattern(optests[i].b_sgn, optests[i].b_exp, + optests[i].b_mant); + x = pattern(optests[i].x_sgn, optests[i].x_exp, + optests[i].x_mant); + for (j = 0; j < 4; j++) + if (optests[i].rmode & 1<<j) + { + fesetenv(rmodes+j); + switch (optests[i].op) + { + case B_ADD: r = a + b; break; + case B_SUB: r = a - b; break; + case B_MUL: r = a * b; break; + case B_DIV: r = a / b; break; + case B_NEG: r = -a; break; + case B_ABS: r = FUNC(fabs)(a); break; + case B_SQRT: r = FUNC(sqrt)(a); break; + } + raised = fetestexcept(all_exceptions); + check_result(optests[i].line,rmnames[j],x,r); + check_excepts(optests[i].line,rmnames[j], + optests[i].excepts,raised); + } + } +} + +static void +fail_xr(int line, const char *rm, tocheck_t x, tocheck_t r, tocheck_t xx, + int xflag) +{ + size_t i; + unsigned char *cx, *cr, *cxx; + + printf("%s:%d:round %s:fail\n with x=0x", __FILE__, line,rm); + cx = (unsigned char *)&x; + cr = (unsigned char *)&r; + cxx = (unsigned char *)&xx; + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cx[i]); + printf(" r=0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cr[i]); + printf(" xx=0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cxx[i]); + printf(" inexact=%d\n", xflag != 0); + nerrors++; +} + +static void +check_sqrt(tocheck_t a) +{ + int j; + tocheck_t r0, r1, r2, x0, x1, x2; + int raised = 0; + int ok; + + for (j = 0; j < 4; j++) + { + int excepts; + + fesetenv(rmodes+j); + r1 = FUNC(sqrt)(a); + excepts = fetestexcept(all_exceptions); + fesetenv(FE_DFL_ENV); + raised |= excepts & ~FE_INEXACT; + x1 = r1 * r1 - a; + if (excepts & FE_INEXACT) + { + r0 = delta(r1,-1); r2 = delta(r1,1); + switch (1 << j) + { + case R_NEAREST: + x0 = r0 * r0 - a; x2 = r2 * r2 - a; + ok = fabs(x0) >= fabs(x1) && fabs(x1) <= fabs(x2); + break; + case R_ZERO: case R_DOWN: + x2 = r2 * r2 - a; + ok = x1 <= 0 && x2 >= 0; + break; + case R_UP: + x0 = r0 * r0 - a; + ok = x1 >= 0 && x0 <= 0; + break; + default: + assert(0); + } + } + else + ok = x1 == 0; + if (!ok) + fail_xr(__LINE__,rmnames[j],a,r1,x1,excepts&FE_INEXACT); + } + check_excepts(__LINE__,"all",0,raised); +} + +int main(int argc, char **argv) +{ + int i; + + _LIB_VERSION = _IEEE_; + + /* Set up environments for rounding modes. */ + fesetenv(FE_DFL_ENV); + fesetround(FE_TONEAREST); + fegetenv(rmodes+0); + fesetround(FE_TOWARDZERO); + fegetenv(rmodes+1); + fesetround(FE_UPWARD); + fegetenv(rmodes+2); + fesetround(FE_DOWNWARD); + fegetenv(rmodes+3); + +#if defined(FE_INVALID_SOFTWARE) || defined(FE_INVALID_SQRT) + /* There's this really stupid feature of the 601... */ + fesetenv(FE_DFL_ENV); + feraiseexcept(FE_INVALID_SOFTWARE); + if (!fetestexcept(FE_INVALID_SOFTWARE)) + excepts_missing |= FE_INVALID_SOFTWARE; + fesetenv(FE_DFL_ENV); + feraiseexcept(FE_INVALID_SQRT); + if (!fetestexcept(FE_INVALID_SQRT)) + excepts_missing |= FE_INVALID_SQRT; +#endif + + check_op(); + for (i = 0; i < 100000; i++) + check_sqrt(pattern(0, P_Rno, P_R)); + for (i = 0; i < 100; i++) + check_sqrt(pattern(0, P_Z, P_R)); + check_sqrt(pattern(0,P_Z,P_Z1)); + + printf("%d errors.\n", nerrors); + return nerrors == 0 ? 0 : 1; +} diff --git a/REORG.TODO/sysdeps/powerpc/test-arithf.c b/REORG.TODO/sysdeps/powerpc/test-arithf.c new file mode 100644 index 0000000000..d78ec49009 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-arithf.c @@ -0,0 +1,6 @@ +typedef float tocheck_t; +#define ESIZE 8 +#define MSIZE 23 +#define FUNC(x) x##f + +#include "test-arith.c" diff --git a/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c b/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c new file mode 100644 index 0000000000..86d1ca355b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c @@ -0,0 +1,23 @@ +/* Check __ppc_get_hwcap() and __ppc_get_at_plaftorm() functionality. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Tests if the hwcap, hwcap2 and platform data are stored in the TCB. */ + +#define STATIC_TST_HWCAP 1 + +#include "test-get_hwcap.c" diff --git a/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c b/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c new file mode 100644 index 0000000000..d776310734 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c @@ -0,0 +1,177 @@ +/* Check __ppc_get_hwcap() and __ppc_get_at_plaftorm() functionality. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Tests if the hwcap, hwcap2 and platform data are stored in the TCB. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> +#include <pthread.h> + +#include <support/check.h> +#include <support/xthread.h> + +#include <sys/auxv.h> + +#include <dl-procinfo.h> + +#ifndef STATIC_TST_HWCAP +#undef PROCINFO_DECL +#include <dl-procinfo.c> +#endif + +/* Offsets copied from tcb-offsets.h. */ + +#ifdef __powerpc64__ +# define __TPREG "r13" +# define __HWCAPOFF -28776 +# define __ATPLATOFF -28764 +#else +# define __TPREG "r2" +# define __HWCAPOFF -28736 +# define __HWCAP2OFF -28732 +# define __ATPLATOFF -28724 +#endif + +uint64_t check_tcbhwcap (long tid) +{ + + uint32_t tcb_at_platform, at_platform; + uint64_t hwcap, hwcap2, tcb_hwcap; + const char *at_platform_string; + + /* Testing if the hwcap/hwcap2 data is correctly initialized by + TLS_TP_INIT. */ + + register unsigned long __tp __asm__ (__TPREG); + +#ifdef __powerpc64__ + __asm__ ("ld %0,%1(%2)\n" + : "=r" (tcb_hwcap) + : "i" (__HWCAPOFF), "b" (__tp)); +#else + uint64_t h1, h2; + + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (h1) + : "i" (__HWCAPOFF), "b" (__tp)); + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (h2) + : "i" (__HWCAP2OFF), "b" (__tp)); + tcb_hwcap = (h1 >> 32) << 32 | (h2 >> 32); +#endif + + hwcap = getauxval (AT_HWCAP); + hwcap2 = getauxval (AT_HWCAP2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. This is necessary because the + same is done in hwcapinfo.c when setting the values that are copied to + the TCB. */ + + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) + hwcap |= PPC_FEATURE_ARCH_2_06 + | PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + + hwcap = (hwcap << 32) + hwcap2; + + if ( tcb_hwcap != hwcap ) + { + printf ("FAIL: __ppc_get_hwcap() - HWCAP is %" PRIx64 ". Should be %" + PRIx64 " for thread %ld.\n", tcb_hwcap, hwcap, tid); + return 1; + } + + /* Same test for the platform number. */ + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (tcb_at_platform) + : "i" (__ATPLATOFF), "b" (__tp)); + + at_platform_string = (const char *) getauxval (AT_PLATFORM); + at_platform = _dl_string_platform (at_platform_string); + + if ( tcb_at_platform != at_platform ) + { + printf ("FAIL: __ppc_get_at_platform() - AT_PLATFORM is %x. Should be %x" + " for thread %ld\n", tcb_at_platform, at_platform, tid); + return 1; + } + + return 0; +} + +void *t1 (void *tid) +{ + if (check_tcbhwcap ((long) tid)) + { + pthread_exit (tid); + } + + pthread_exit (NULL); + +} + +static int +do_test (void) +{ + + pthread_t threads[2]; + pthread_attr_t attr; + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE); + + long i = 0; + + /* Check for main. */ + if (check_tcbhwcap (i)) + { + return 1; + } + + /* Check for other thread. */ + i++; + threads[i] = xpthread_create (&attr, t1, (void *)i); + + pthread_attr_destroy (&attr); + TEST_VERIFY_EXIT (xpthread_join (threads[i]) == NULL); + + printf("PASS: HWCAP, HWCAP2 and AT_PLATFORM are correctly set in the TCB for" + " all threads.\n"); + + pthread_exit (NULL); + +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/test-gettimebase.c b/REORG.TODO/sysdeps/powerpc/test-gettimebase.c new file mode 100644 index 0000000000..0e8e2f00fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-gettimebase.c @@ -0,0 +1,46 @@ +/* Check __ppc_get_timebase() for architecture changes + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Test if __ppc_get_timebase() is compatible with the current processor and if + it's changing between reads. A read failure might indicate a Power ISA or + binutils change. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> + +#include <sys/platform/ppc.h> + +static int +do_test (void) +{ + uint64_t t1, t2, t3; + t1 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t1); + t2 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t2); + t3 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t3); + if (t1 != t2 && t1 != t3 && t2 != t3) + return 0; + + printf ("Fail: timebase reads should always be different.\n"); + return 1; +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/tls-macros.h b/REORG.TODO/sysdeps/powerpc/tls-macros.h new file mode 100644 index 0000000000..809ef5cea1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tls-macros.h @@ -0,0 +1,3 @@ +#define __TLS_CALL_CLOBBERS \ + "0", "4", "5", "6", "7", "8", "9", "10", "11", "12", \ + "lr", "ctr", "cr0", "cr1", "cr5", "cr6", "cr7" diff --git a/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c b/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c new file mode 100644 index 0000000000..df8dda78e8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c @@ -0,0 +1,102 @@ +/* Test the implementation of __ppc_set_ppr_* functions. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> +#include <sys/auxv.h> + +#include <sys/platform/ppc.h> + +#include <support/test-driver.h> + +#ifdef __powerpc64__ + typedef uint64_t ppr_t; +# define MFPPR "mfppr" + /* The thread priority value is obtained from bits 11:13. */ +# define EXTRACT_THREAD_PRIORITY(x) ((x >> 50) & 7) +#else + typedef uint32_t ppr_t; +# define MFPPR "mfppr32" + /* For 32-bit, the upper 32 bits of the Program Priority Register (PPR) + are used, so the thread priority value is obtained from bits 43:46. */ +# define EXTRACT_THREAD_PRIORITY(x) ((x >> 18) & 7) +#endif /* !__powerpc64__ */ + +/* Read the thread priority value set in the PPR. */ +static __inline__ ppr_t +get_thread_priority (void) +{ + /* Read the PPR. */ + ppr_t ppr; + asm volatile (MFPPR" %0" : "=r"(ppr)); + /* Return the thread priority value. */ + return EXTRACT_THREAD_PRIORITY (ppr); +} + +/* Check the thread priority bits of PPR are set as expected. */ +uint8_t +check_thread_priority (uint8_t expected) +{ + ppr_t actual = get_thread_priority (); + + if (actual != expected) + { + printf ("FAIL: Expected %"PRIu8" got %"PRIuMAX".\n", expected, + (uintmax_t) actual); + return 1; + } + printf ("PASS: Thread priority set to %"PRIu8" correctly.\n", expected); + return 0; +} + +/* The Power ISA 2.06 allows the following thread priorities for any + problem state program: low (2), medium low (3), and medium (4). + Power ISA 2.07b added very low (1). + Check whether the values set by __ppc_set_ppr_* are correct. */ +static int +do_test (void) +{ + /* Check for the minimum required Power ISA to run these tests. */ + if ((getauxval (AT_HWCAP) & PPC_FEATURE_ARCH_2_06) == 0) + { + printf ("Requires an environment that implements the Power ISA version" + " 2.06 or greater.\n"); + return EXIT_UNSUPPORTED; + } + + uint8_t rc = 0; + +#ifdef _ARCH_PWR8 + __ppc_set_ppr_very_low (); + rc |= check_thread_priority (1); +#endif /* _ARCH_PWR8 */ + + __ppc_set_ppr_low (); + rc |= check_thread_priority (2); + + __ppc_set_ppr_med_low (); + rc |= check_thread_priority (3); + + __ppc_set_ppr_med (); + rc |= check_thread_priority (4); + + return rc; +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/tst-stack-align.h b/REORG.TODO/sysdeps/powerpc/tst-stack-align.h new file mode 100644 index 0000000000..c01d0793ff --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-stack-align.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdint.h> + +#define TEST_STACK_ALIGN() \ + ({ \ + /* Altivec __vector int etc. needs 16byte aligned stack. \ + Instead of using altivec.h here, use aligned attribute instead. */ \ + struct _S \ + { \ + int _i __attribute__((aligned (16))); \ + int _j[3]; \ + } _s = { ._i = 18, ._j[0] = 19, ._j[1] = 20, ._j[2] = 21 }; \ + double _d = 12.0; \ + long double _ld = 15.0; \ + int _ret = 0; \ + printf ("__vector int: { %d, %d, %d, %d } %p %zu\n", _s._i, _s._j[0], \ + _s._j[1], _s._j[2], &_s, __alignof (_s)); \ + if ((((uintptr_t) &_s) & (__alignof (_s) - 1)) != 0) \ + _ret = 1; \ + \ + printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \ + if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \ + _ret = 1; \ + \ + printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \ + if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \ + _ret = 1; \ + _ret; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c b/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c new file mode 100644 index 0000000000..8ae928a3f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c @@ -0,0 +1,51 @@ +/* glibc test for __tls_get_addr optimization. */ +#include <stdio.h> + +#include "../../elf/tls-macros.h" +#include "dl-tls.h" + +/* common 'int' variable in TLS. */ +COMMON_INT_DEF(foo); + + +static int +do_test (void) +{ + int result = 0; + + /* Get variable using general dynamic model. */ + int *ap = TLS_GD (foo); + if (*ap != 0) + { + printf ("foo = %d\n", *ap); + result = 1; + } + + tls_index *tls_arg; +#ifdef __powerpc64__ + register unsigned long thread_pointer __asm__ ("r13"); + asm ("addi %0,2,foo@got@tlsgd" : "=r" (tls_arg)); +#else + register unsigned long thread_pointer __asm__ ("r2"); + asm ("bcl 20,31,1f\n1:\t" + "mflr %0\n\t" + "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" + "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" + "addi %0,%0,foo@got@tlsgd" : "=b" (tls_arg)); +#endif + + if (tls_arg->ti_module != 0) + { + printf ("tls_index not optimized, binutils too old?\n"); + result = 1; + } + else if (tls_arg->ti_offset + thread_pointer != (unsigned long) ap) + { + printf ("tls_index->ti_offset wrong value\n"); + result = 1; + } + + return result; +} + +#include <support/test-driver.c> |