diff options
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc/powerpc64')
420 files changed, 35095 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies new file mode 100644 index 0000000000..a8cae95f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies @@ -0,0 +1 @@ +wordsize-64 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile new file mode 100644 index 0000000000..9d15db0328 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile @@ -0,0 +1,49 @@ +# Powerpc64 specific build options. +# this is ./sysdeps/powerpc/powerpc64/Makefile + +# Each TOC entry takes 8 bytes and the TOC holds up to 2^16 bytes, +# or 8192 entries. +# If -fpic is not specified, the latest gcc-3.2.1 now generates +# different code for call stubs (without the TOC reload). +# Shared objects need the TOC reload so specify -fpic. +ifeq (yes,$(build-shared)) +pic-ccflag = -fpic +endif + +# These flags prevent FPU or Altivec registers from being used, +# for code called in contexts that is not allowed to touch those registers. +# Stupid GCC requires us to pass all these ridiculous switches. We need to +# pass the -mno-* switches as well to prevent the compiler from attempting +# to emit altivec or vsx instructions, especially when the registers aren't +# available. +no-special-regs := $(sort $(foreach n,40 41 50 51 60 61 62 63 \ + $(foreach m,2 3 4 5 6 7 8 9, \ + 3$m 4$m 5$m),\ + -ffixed-$n)) \ + $(sort $(foreach n,$(foreach m,0 1 2 3 4 5 6 7 8 9,\ + $m 1$m 2$m) 30 31,\ + -ffixed-v$n)) \ + -ffixed-vrsave -ffixed-vscr -mno-altivec -mno-vsx + +# Need to prevent gcc from using fprs in code used during dynamic linking. + +CFLAGS-dl-runtime.os = $(no-special-regs) +CFLAGS-dl-lookup.os = $(no-special-regs) +CFLAGS-dl-misc.os = $(no-special-regs) +CFLAGS-rtld-mempcpy.os = $(no-special-regs) +CFLAGS-rtld-memmove.os = $(no-special-regs) +CFLAGS-rtld-memchr.os = $(no-special-regs) +CFLAGS-rtld-strnlen.os = $(no-special-regs) + +ifeq ($(subdir),elf) +# help gcc inline asm code from dl-machine.h ++cflags += -finline-limit=2000 +endif + +ifeq ($(subdir),gmon) +# The assembly functions assume that fp arg regs are not trashed. +# Compiling with -msoft-float ensures that fp regs are not used +# for moving memory around. +CFLAGS-mcount.c += $(no-special-regs) +sysdep_routines += ppc-mcount +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S new file mode 100644 index 0000000000..efda025b41 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -0,0 +1,183 @@ +/* longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#define _SETJMP_H +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (__longjmp) + CALL_MCOUNT 2 +#ifndef __NO_VMX__ + ld r5,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) +# else + ld r5,0(r5) /* Load extern _dl_hwcap. */ +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 + lvsl v0,0,r5 + lvx v1,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + vperm v20,v1,v21,v0 +# define load_misaligned_vmx_lo_loaded(loadvr,lovr,shiftvr,loadgpr,addgpr) \ + addi addgpr,addgpr,32; \ + lvx lovr,0,loadgpr; \ + vperm loadvr,loadvr,lovr,shiftvr; + load_misaligned_vmx_lo_loaded(v21,v22,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v22,v23,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v23,v24,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v24,v25,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v25,v26,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v26,v27,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v27,v28,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v28,v29,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v29,v30,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v30,v31,v0,r6,r5) + lvx v1,0,r5 + vperm v31,v31,v1,v0 + b L(no_vmx) +L(aligned_restore_vmx): + addi r6,r5,16 + lvx v20,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + addi r6,r6,32 + lvx v22,0,r5 + addi r5,r5,32 + lvx v23,0,r6 + addi r6,r6,32 + lvx v24,0,r5 + addi r5,r5,32 + lvx v25,0,r6 + addi r6,r6,32 + lvx v26,0,r5 + addi r5,r5,32 + lvx v27,0,r6 + addi r6,r6,32 + lvx v28,0,r5 + addi r5,r5,32 + lvx v29,0,r6 + addi r6,r6,32 + lvx v30,0,r5 + lvx v31,0,r6 +L(no_vmx): +#endif +#if defined PTR_DEMANGLE || defined CHECK_SP + ld r22,(JB_GPR1*8)(r3) +#else + ld r1,(JB_GPR1*8)(r3) +#endif +#ifdef PTR_DEMANGLE +# ifdef CHECK_SP + PTR_DEMANGLE3 (r22, r22, r25) +# else + PTR_DEMANGLE3 (r1, r22, r25) +# endif +#endif +#ifdef CHECK_SP + CHECK_SP (r22) + mr r1,r22 +#endif + ld r2,(JB_GPR2*8)(r3) + ld r0,(JB_LR*8)(r3) + ld r14,((JB_GPRS+0)*8)(r3) + lfd fp14,((JB_FPRS+0)*8)(r3) +#if defined SHARED && !IS_IN (rtld) + std r2,FRAME_TOC_SAVE(r1) /* Restore the callers TOC save area. */ +#endif + ld r15,((JB_GPRS+1)*8)(r3) + lfd fp15,((JB_FPRS+1)*8)(r3) + ld r16,((JB_GPRS+2)*8)(r3) + lfd fp16,((JB_FPRS+2)*8)(r3) + ld r17,((JB_GPRS+3)*8)(r3) + lfd fp17,((JB_FPRS+3)*8)(r3) + ld r18,((JB_GPRS+4)*8)(r3) + lfd fp18,((JB_FPRS+4)*8)(r3) + ld r19,((JB_GPRS+5)*8)(r3) + lfd fp19,((JB_FPRS+5)*8)(r3) + ld r20,((JB_GPRS+6)*8)(r3) + lfd fp20,((JB_FPRS+6)*8)(r3) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (8@3), + second argument (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (longjmp, 3, 8@3, -4@4, 8@0) + mtlr r0 +/* std r2,FRAME_TOC_SAVE(r1) Restore the TOC save area. */ + ld r21,((JB_GPRS+7)*8)(r3) + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) + lfd fp22,((JB_FPRS+8)*8)(r3) + lwz r5,((JB_CR*8)+4)(r3) /* 32-bit CR. */ + ld r23,((JB_GPRS+9)*8)(r3) + lfd fp23,((JB_FPRS+9)*8)(r3) + ld r24,((JB_GPRS+10)*8)(r3) + lfd fp24,((JB_FPRS+10)*8)(r3) + ld r25,((JB_GPRS+11)*8)(r3) + lfd fp25,((JB_FPRS+11)*8)(r3) + mtcrf 0xFF,r5 + ld r26,((JB_GPRS+12)*8)(r3) + lfd fp26,((JB_FPRS+12)*8)(r3) + ld r27,((JB_GPRS+13)*8)(r3) + lfd fp27,((JB_FPRS+13)*8)(r3) + ld r28,((JB_GPRS+14)*8)(r3) + lfd fp28,((JB_FPRS+14)*8)(r3) + ld r29,((JB_GPRS+15)*8)(r3) + lfd fp29,((JB_FPRS+15)*8)(r3) + ld r30,((JB_GPRS+16)*8)(r3) + lfd fp30,((JB_FPRS+16)*8)(r3) + ld r31,((JB_GPRS+17)*8)(r3) + lfd fp31,((JB_FPRS+17)*8)(r3) + LIBC_PROBE (longjmp_target, 3, 8@3, -4@4, 8@0) + mr r3,r4 + blr +END (__longjmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S new file mode 100644 index 0000000000..78659d012f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S @@ -0,0 +1,39 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp) +# define __longjmp __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __longjmp +# undef JB_SIZE +# define __longjmp __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S new file mode 100644 index 0000000000..ff30898df5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S @@ -0,0 +1,528 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Brutman <brutman@us.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + + + .machine a2 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 /* Copy dest reg to r6; */ + blt+ cr1,L(shortcopy) + + + /* Big copy (16 bytes or more) + + Figure out how far to the nearest quadword boundary, or if we are + on one already. Also get the cache line size. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ + ld r9,.LC0@toc(r2) /* Get cache line size (part 1) */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 /* compute offset to src from dest */ + lwz r9,0(r9) /* Get cache line size (part 2) */ + cmpldi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */ + addi r10,r9,-1 /* Cache line mask */ + beq+ L(dst_aligned) + + + + /* Destination is not aligned on quadword boundary. Get us to one. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + r7 - offset to src from dest + r8 - number of bytes to quadword boundary + */ + + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 /* adjust remaining len */ + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte addr */ + stb r0,0(r6) + addi r6,r6,1 +1: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte addr */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte addr */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte addr */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 /* update src addr */ + + + + /* Dest is quadword aligned now. + + Lots of decisions to make. If we are copying less than a cache + line we won't be here long. If we are not on a cache line + boundary we need to get there. And then we need to figure out + how many cache lines ahead to pre-touch. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + + .align 4 +L(dst_aligned): + cmpdi cr0,r9,0 /* Cache line size set? */ + bne+ cr0,L(cachelineset) + +/* __cache_line_size not set: generic byte copy without much optimization */ + clrldi. r0,r5,63 /* If length is odd copy one byte */ + beq L(cachelinenotset_align) + lbz r7,0(r4) /* Read one byte from source */ + addi r5,r5,-1 /* Update length */ + addi r4,r4,1 /* Update source pointer address */ + stb r7,0(r6) /* Store one byte at dest */ + addi r6,r6,1 /* Update dest pointer address */ +L(cachelinenotset_align): + cmpdi cr7,r5,0 /* If length is 0 return */ + beqlr cr7 + ori r2,r2,0 /* Force a new dispatch group */ +L(cachelinenotset_loop): + addic. r5,r5,-2 /* Update length */ + lbz r7,0(r4) /* Load 2 bytes from source */ + lbz r8,1(r4) + addi r4,r4,2 /* Update source pointer address */ + stb r7,0(r6) /* Store 2 bytes on dest */ + stb r8,1(r6) + addi r6,r6,2 /* Update dest pointer address */ + bne L(cachelinenotset_loop) + blr + + +L(cachelineset): + cmpd cr5,r5,r10 /* Less than a cacheline to go? */ + + neg r7,r6 /* How far to next cacheline bdy? */ + + addi r6,r6,-8 /* prepare for stdu */ + cmpdi cr0,r9,128 + addi r4,r4,-8 /* prepare for ldu */ + + + ble+ cr5,L(lessthancacheline) + + beq- cr0,L(big_lines) /* 128 byte line code */ + + + + /* More than a cacheline left to go, and using 64 byte cachelines */ + + clrldi r7,r7,64-6 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,6 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,64+8 /* prefetch distance */ + ble L(lessthanmaxprefetch) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC): + dcbt r12,r4 + addi r12,r12,64 + bdnz L(prefetchSRC) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch): + mtctr r7 + + cmpldi cr1,r5,64 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-6 + + beq cr6,L(cachelinealigned) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline) + + + .align 4 +L(cachelinealigned): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <64 */ + +L(outerloop): + cmpdi r11,0 + mtctr r11 + beq- L(endloop) + + li r11,64*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop) + + +L(endloop): + cmpdi r10,0 + beq- L(endloop2) + mtctr r10 + +L(loop2): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop2) +L(endloop2): + + + .align 4 +L(lessthancacheline): /* Was there less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- L(do_lt16) + mtctr r7 + +L(copy_remaining): + ld r8,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz L(copy_remaining) + +L(do_lt16): /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +L(shortcopy): /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: + blr + + + + + + /* Similar to above, but for use with 128 byte lines. */ + + +L(big_lines): + + clrldi r7,r7,64-7 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,7 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch_128) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble L(lessthanmaxprefetch_128) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch_128): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC_128): + dcbt r12,r4 + addi r12,r12,128 + bdnz L(prefetchSRC_128) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch_128): + mtctr r7 + + cmpldi cr1,r5,128 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-7 + + beq cr6,L(cachelinealigned_128) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline_128): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline_128) + + +L(cachelinealigned_128): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <128 */ + +L(outerloop_128): + cmpdi r11,0 + mtctr r11 + beq- L(endloop_128) + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop_128): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop_128) + + +L(endloop_128): + cmpdi r10,0 + beq- L(endloop2_128) + mtctr r10 + +L(loop2_128): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop2_128) +L(endloop2_128): + + b L(lessthancacheline) + + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S new file mode 100644 index 0000000000..b4b052141d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S @@ -0,0 +1,208 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_SUBMUL +# define FUNC __mpn_submul_1 +# define ADDSUBC subfe +# define ADDSUB subfc +#else +# define FUNC __mpn_addmul_1 +# define ADDSUBC adde +# define ADDSUB addc +#endif + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(FUNC, 5, 0) + std r31, -8(r1) + rldicl. r0, N, 0, 62 + std r30, -16(r1) + cmpdi VL, r0, 2 + std r29, -24(r1) + addi N, N, 3 + std r28, -32(r1) + srdi N, N, 2 + std r27, -40(r1) + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): ld r9, 0(UP) + ld r28, 0(RP) + mulld r0, r9, VL + mulhdu r12, r9, VL + ADDSUB r0, r0, r28 + std r0, 0(RP) + addi RP, RP, 8 + ld r9, 8(UP) + ld r27, 16(UP) + addi UP, UP, 24 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b00): ld r9, 0(UP) + ld r27, 8(UP) + ld r28, 0(RP) + ld r29, 8(RP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r7, r7, N + addze r12, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + addi RP, RP, 16 + ld r9, 16(UP) + ld r27, 24(UP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b01): bdnz L(gt1) + ld r9, 0(UP) + ld r11, 0(RP) + mulld r0, r9, VL + mulhdu r8, r9, VL + ADDSUB r0, r0, r11 + std r0, 0(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + blr + +L(gt1): ld r9, 0(UP) + ld r27, 8(UP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 16(UP) + ld r28, 0(RP) + ld r29, 8(RP) + ld r30, 16(RP) + mulld r11, r9, VL + mulhdu r10, r9, VL + addc r7, r7, N + adde r11, r11, r8 + addze r12, r10 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC r11, r11, r30 + std r11, 16(RP) + addi RP, RP, 24 + ld r9, 24(UP) + ld r27, 32(UP) + addi UP, UP, 40 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + +L(b10): addic r0, r0, r0 + li r12, 0 + ld r9, 0(UP) + ld r27, 8(UP) + bdz L(end) + addi UP, UP, 16 + + .align 4 +L(top): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 0(UP) + ld r28, 0(RP) + ld r27, 8(UP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + mulld N, r9, VL + mulhdu r10, r9, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r9, 16(UP) + ld r30, 16(RP) + ld r27, 24(UP) + ld r31, 24(RP) + adde N, N, r8 + adde r11, r11, r10 + addze r12, r12 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC N, N, r30 + std N, 16(RP) + ADDSUBC r11, r11, r31 + std r11, 24(RP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + addi RP, RP, 32 +L(bot): +#ifdef USE_AS_SUBMUL + addic r11, r11, 1 +#endif + bdnz L(top) + +L(end): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r28, 0(RP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + addze r8, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + ld r31, -8(r1) + ld r30, -16(r1) + ld r29, -24(r1) + ld r28, -32(r1) + ld r27, -40(r1) + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h new file mode 100644 index 0000000000..46df488b3c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h @@ -0,0 +1,242 @@ +/* Atomic operations. PowerPC64 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction. + This is a hint to the hardware to expect additional updates adjacent + to the lock word or not. If we are acquiring a Mutex, the hint + should be true. Otherwise we releasing a Mutex or doing a simple + atomic operation. In that case we don't expect additional updates + adjacent to the lock word after the Store Conditional and the hint + should be false. */ + +#if defined _ARCH_PWR6 || defined _ARCH_PWR6X +# define MUTEX_HINT_ACQ ",1" +# define MUTEX_HINT_REL ",0" +#else +# define MUTEX_HINT_ACQ +# define MUTEX_HINT_REL +#endif + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 1 + +/* The 32-bit exchange_bool is different on powerpc64 because the subf + does signed 64-bit arithmetic while the lwarx is 32-bit unsigned + (a load word and zero (high 32) form) load. + In powerpc64 register values are 64-bit by default, including oldval. + The value in old val unknown sign extension, lwarx loads the 32-bit + value as unsigned. So we explicitly clear the high 32 bits in oldval. */ +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ +({ \ + unsigned int __tmp, __tmp2; \ + __asm __volatile (" clrldi %1,%1,32\n" \ + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%1,%0\n" \ + " bne 2f\n" \ + " stwcx. %4,0,%2\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp), "=r" (__tmp2) \ + : "b" (mem), "1" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +/* + * Only powerpc64 processors support Load doubleword and reserve index (ldarx) + * and Store doubleword conditional indexed (stdcx) instructions. So here + * we define the 64-bit forms. + */ +#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ +({ \ + unsigned long __tmp; \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%2,%0\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (mem), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%1" MUTEX_HINT_REL "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_atomic_exchange_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b\n" \ + " " __ARCH_ACQ_INSTR \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b" \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3" MUTEX_HINT_ACQ "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%3" MUTEX_HINT_REL "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_increment_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " addi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " subi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_if_positive_64(mem) \ + ({ int __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " cmpdi 0,%0,0\n" \ + " addi %1,%0,-1\n" \ + " ble 2f\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +/* + * All powerpc64 processors support the new "light weight" sync (lwsync). + */ +#define atomic_read_barrier() __asm ("lwsync" ::: "memory") +/* + * "light weight" sync can also be used for the release barrier. + */ +#ifndef UP +# define __ARCH_REL_INSTR "lwsync" +#endif +#define atomic_write_barrier() __asm ("lwsync" ::: "memory") + +/* + * Include the rest of the atomic ops macros which are common to both + * powerpc32 and powerpc64. + */ +#include_next <atomic-machine.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c new file mode 100644 index 0000000000..723948d78f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c @@ -0,0 +1,104 @@ +/* Return backtrace of current program state. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> + +#include <execinfo.h> +#include <libc-vdso.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | cr save | | cr save | + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *next; + long int condition_register; + void *return_address; +}; + +/* Since the signal handler is just like any other function it needs to + save/restore its LR and it will save it into callers stack frame. + Since a signal handler doesn't have a caller, the kernel creates a + dummy frame to make it look like it has a caller. */ +struct signal_frame_64 { +#define SIGNAL_FRAMESIZE 128 + char dummy[SIGNAL_FRAMESIZE]; + struct ucontext uc; + /* We don't care about the rest, since the IP value is at 'uc' field. */ +}; + +static inline int +is_sigtramp_address (void *nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp_rt64)) + return 1; +#endif + return 0; +} + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("ld %0,0(1)" : "=r"(current)); + + for ( count = 0; + current != NULL && count < size; + current = current->next, count++) + { + array[count] = current->return_address; + + /* Check if the symbol is the signal trampoline and get the interrupted + * symbol address from the trampoline saved area. */ + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_64 *sigframe = (struct signal_frame_64*) current; + array[++count] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + current = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_R1]; + } + } + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h new file mode 100644 index 0000000000..04ca9debf0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __powerpc64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S new file mode 100644 index 0000000000..86d49b1c6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S new file mode 100644 index 0000000000..38b734fcb4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S new file mode 100644 index 0000000000..41cfac5127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S @@ -0,0 +1,20 @@ +/* Optimized bzero `implementation' for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This code was moved into memset.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S new file mode 100644 index 0000000000..1cc66456e3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S @@ -0,0 +1,246 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ + +/* memcpy routine optimized for CELL-BE-PPC v2.0 + * + * The CELL PPC core has 1 integer unit and 1 load/store unit + * CELL: + * 1st level data cache = 32K + * 2nd level data cache = 512K + * 3rd level data cache = 0K + * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks, + * latency to memory is >400 clocks + * To improve copy performance we need to prefetch source data + * far ahead to hide this latency + * For best performance instruction forms ending in "." like "andi." + * should be avoided as the are implemented in microcode on CELL. + * The below code is loop unrolled for the CELL cache line of 128 bytes + */ + +.align 7 + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 + blt+ cr1,.Lshortcopy + +.Lbigcopy: + neg r8,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 + cmpldi cr0,r8,0 + beq+ .Ldst_aligned + +.Ldst_unaligned: + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) + addi r6,r6,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 + +.Ldst_aligned: + + cmpdi cr5,r5,128-1 + + neg r7,r6 + addi r6,r6,-8 /* prepare for stdu */ + addi r4,r4,-8 /* prepare for ldu */ + + clrldi r7,r7,64-7 /* align to cacheline boundary */ + ble+ cr5,.Llessthancacheline + + cmpldi cr6,r7,0 + subf r5,r7,r5 + srdi r7,r7,4 /* divide size by 16 */ + srdi r10,r5,7 /* number of cache lines to copy */ + + cmpldi r10,0 + li r11,0 /* number cachelines to copy with prefetch */ + beq .Lnocacheprefetch + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble .Llessthanmaxprefetch + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +.Llessthanmaxprefetch: + mtctr r10 + +.LprefetchSRC: + dcbt r12,r4 + addi r12,r12,128 + bdnz .LprefetchSRC + +.Lnocacheprefetch: + mtctr r7 + cmpldi cr1,r5,128 + clrldi r5,r5,64-7 + beq cr6,.Lcachelinealigned + +.Laligntocacheline: + ld r9,0x08(r4) + ldu r7,0x10(r4) + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz .Laligntocacheline + + +.Lcachelinealigned: /* copy while cache lines */ + + blt- cr1,.Llessthancacheline /* size <128 */ + +.Louterloop: + cmpdi r11,0 + mtctr r11 + beq- .Lendloop + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + +.align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +.Lloop: /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) /* 4 register stride copy is optimal */ + ld r8, 0x18(r4) /* to hide 1st level cache latency. */ + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ldu r0, 0x80(r4) + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz .Lloop + +.Lendloop: + cmpdi r10,0 + sldi r10,r10,2 /* adjust from 128 to 32 byte stride */ + beq- .Lendloop2 + mtctr r10 + +.Lloop2: /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ldu r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + stdu r0, 0x20(r6) + + bdnz .Lloop2 +.Lendloop2: + +.Llessthancacheline: /* less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- .Ldo_lt16 + mtctr r7 + +.Lcopy_remaining: + ld r8,0x08(r4) + ldu r7,0x10(r4) + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz .Lcopy_remaining + +.Ldo_lt16: /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +.Lshortcopy: /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure b/REORG.TODO/sysdeps/powerpc/powerpc64/configure new file mode 100644 index 0000000000..7632a7be04 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure @@ -0,0 +1,33 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc64. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for support for overlapping .opd entries" >&5 +$as_echo_n "checking for support for overlapping .opd entries... " >&6; } +if ${libc_cv_overlapping_opd+:} false; then : + $as_echo_n "(cached) " >&6 +else + libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +fi +rm -f conftest.c conftest.s + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_overlapping_opd" >&5 +$as_echo "$libc_cv_overlapping_opd" >&6; } +if test x$libc_cv_overlapping_opd = xyes; then + $as_echo "#define USE_PPC64_OVERLAPPING_OPD 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac new file mode 100644 index 0000000000..f309ba35a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac @@ -0,0 +1,23 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc64. + +AC_CACHE_CHECK(for support for overlapping .opd entries, +libc_cv_overlapping_opd, [dnl +libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +changequote([,])dnl +fi +rm -f conftest.c conftest.s +]) +if test x$libc_cv_overlapping_opd = xyes; then + AC_DEFINE(USE_PPC64_OVERLAPPING_OPD) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S new file mode 100644 index 0000000000..fa4f5833ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S @@ -0,0 +1,88 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + +#if PREINIT_FUNCTION_WEAK + .section ".toc", "aw" +.LC0: + .tc PREINIT_FUNCTION[TC], PREINIT_FUNCTION +#endif + .section ".init", "ax", @progbits + ENTRY_2(_init) + .align ALIGNARG (2) +BODY_LABEL (_init): + LOCALENTRY(_init) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) +#if PREINIT_FUNCTION_WEAK + addis r9, r2, .LC0@toc@ha + ld r0, .LC0@toc@l(r9) + cmpdi cr7, r0, 0 + beq+ cr7, 1f +#endif + bl JUMPTARGET (PREINIT_FUNCTION) + nop +1: + + .section ".fini", "ax", @progbits + ENTRY_2(_fini) + .align ALIGNARG (2) +BODY_LABEL (_fini): + LOCALENTRY(_fini) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S new file mode 100644 index 0000000000..a8906aa16c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S @@ -0,0 +1,51 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr + + .section .fini,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h new file mode 100644 index 0000000000..142714b421 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h @@ -0,0 +1,21 @@ +/* Configuration of lookup functions. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC64_NUM diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h new file mode 100644 index 0000000000..d8f5988bc9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h @@ -0,0 +1,63 @@ +/* Machine-dependent ELF indirect relocation inline functions. + PowerPC64 version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <dl-machine.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf64_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf64_Addr addr) +{ + return ((Elf64_Addr (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf64_Rela *reloc) +{ + unsigned int r_type = ELF64_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_PPC64_IRELATIVE)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else if (__glibc_likely (r_type == R_PPC64_JMP_IREL)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); +#if _CALL_ELF != 2 + *(Elf64_FuncDesc *) reloc_addr = *(Elf64_FuncDesc *) value; +#else + *reloc_addr = value; +#endif + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c new file mode 100644 index 0000000000..0eccc6621a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c @@ -0,0 +1,47 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC64 version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <_itoa.h> +#include <dl-machine.h> + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) +{ + char buffer[128]; + char *t; + t = stpcpy (buffer, name); + t = stpcpy (t, " reloc at 0x"); + _itoa_word ((unsigned long) reloc_addr, t, 16, 0); + if (refsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + refsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h new file mode 100644 index 0000000000..6391b3a558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h @@ -0,0 +1,1036 @@ +/* Machine-dependent ELF dynamic relocation inline functions. + PowerPC64 version. + Copyright 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc64" + +#include <assert.h> +#include <sys/param.h> +#include <dl-tls.h> +#include <sysdep.h> +#include <hwcapinfo.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +#if _CALL_ELF != 2 +/* A PowerPC64 function descriptor. The .plt (procedure linkage + table) and .opd (official procedure descriptor) sections are + arrays of these. */ +typedef struct +{ + Elf64_Addr fd_func; + Elf64_Addr fd_toc; + Elf64_Addr fd_aux; +} Elf64_FuncDesc; +#endif + +#define ELF_MULT_MACHINES_SUPPORTED + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + /* Verify that the binary matches our ABI version. */ + if ((ehdr->e_flags & EF_PPC64_ABI) != 0) + { +#if _CALL_ELF != 2 + if ((ehdr->e_flags & EF_PPC64_ABI) != 1) + return 0; +#else + if ((ehdr->e_flags & EF_PPC64_ABI) != 2) + return 0; +#endif + } + + return ehdr->e_machine == EM_PPC64; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_machine (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_class (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_ident[EI_CLASS] == ELFCLASS32; +} + + +/* Return the run-time load address of the shared object, assuming it + was originally linked at zero. */ +static inline Elf64_Addr +elf_machine_load_address (void) __attribute__ ((const)); + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + Elf64_Addr ret; + + /* The first entry in .got (and thus the first entry in .toc) is the + link-time TOC_base, ie. r2. So the difference between that and + the current r2 set by the kernel is how far the shared lib has + moved. */ + asm ( " ld %0,-32768(2)\n" + " subf %0,%0,2\n" + : "=r" (ret)); + return ret; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ + Elf64_Addr runtime_dynamic; + /* It's easier to get the run-time address. */ + asm ( " addis %0,2,_DYNAMIC@toc@ha\n" + " addi %0,%0,_DYNAMIC@toc@l\n" + : "=b" (runtime_dynamic)); + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf64_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + + +#ifdef HAVE_INLINED_SYSCALLS +/* We do not need _dl_starting_up. */ +# define DL_STARTING_UP_DEF +#else +# define DL_STARTING_UP_DEF \ +".LC__dl_starting_up:\n" \ +" .tc __GI__dl_starting_up[TC],__GI__dl_starting_up\n" +#endif + + +/* Initial entry point code for the dynamic linker. The C function + `_dl_start' is the real entry point; its return value is the user + program's entry point. */ +#define RTLD_START \ + asm (".pushsection \".text\"\n" \ +" .align 2\n" \ +" " ENTRY_2(_start) "\n" \ +BODY_PREFIX "_start:\n" \ +" " LOCALENTRY(_start) "\n" \ +/* We start with the following on the stack, from top: \ + argc (4 bytes); \ + arguments for program (terminated by NULL); \ + environment variables (terminated by NULL); \ + arguments for the program loader. */ \ +" mr 3,1\n" \ +" li 4,0\n" \ +" stdu 4,-128(1)\n" \ +/* Call _dl_start with one parameter pointing at argc. */ \ +" bl " DOT_PREFIX "_dl_start\n" \ +" nop\n" \ +/* Transfer control to _dl_start_user! */ \ +" b " DOT_PREFIX "_dl_start_user\n" \ +".LT__start:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__start-" BODY_PREFIX "_start\n" \ +" .short .LT__start_name_end-.LT__start_name_start\n" \ +".LT__start_name_start:\n" \ +" .ascii \"_start\"\n" \ +".LT__start_name_end:\n" \ +" .align 2\n" \ +" " END_2(_start) "\n" \ +" .pushsection \".toc\",\"aw\"\n" \ +DL_STARTING_UP_DEF \ +".LC__rtld_local:\n" \ +" .tc _rtld_local[TC],_rtld_local\n" \ +".LC__dl_argc:\n" \ +" .tc _dl_argc[TC],_dl_argc\n" \ +".LC__dl_argv:\n" \ +" .tc __GI__dl_argv[TC],__GI__dl_argv\n" \ +".LC__dl_fini:\n" \ +" .tc _dl_fini[TC],_dl_fini\n" \ +" .popsection\n" \ +" " ENTRY_2(_dl_start_user) "\n" \ +/* Now, we do our main work of calling initialisation procedures. \ + The ELF ABI doesn't say anything about parameters for these, \ + so we just pass argc, argv, and the environment. \ + Changing these is strongly discouraged (not least because argc is \ + passed by value!). */ \ +BODY_PREFIX "_dl_start_user:\n" \ +" " LOCALENTRY(_dl_start_user) "\n" \ +/* the address of _start in r30. */ \ +" mr 30,3\n" \ +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ +" ld 28,.LC__rtld_local@toc(2)\n" \ +" ld 29,.LC__dl_argc@toc(2)\n" \ +" ld 27,.LC__dl_argv@toc(2)\n" \ +/* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ +" ld 3,0(28)\n" \ +" lwa 4,0(29)\n" \ +" ld 5,0(27)\n" \ +" sldi 6,4,3\n" \ +" add 6,5,6\n" \ +" addi 6,6,8\n" \ +" bl " DOT_PREFIX "_dl_init\n" \ +" nop\n" \ +/* Now, to conform to the ELF ABI, we have to: \ + Pass argc (actually _dl_argc) in r3; */ \ +" lwa 3,0(29)\n" \ +/* Pass argv (actually _dl_argv) in r4; */ \ +" ld 4,0(27)\n" \ +/* Pass argv+argc+1 in r5; */ \ +" sldi 5,3,3\n" \ +" add 6,4,5\n" \ +" addi 5,6,8\n" \ +/* Pass the auxiliary vector in r6. This is passed to us just after \ + _envp. */ \ +"2: ldu 0,8(6)\n" \ +" cmpdi 0,0\n" \ +" bne 2b\n" \ +" addi 6,6,8\n" \ +/* Pass a termination function pointer (in this case _dl_fini) in \ + r7. */ \ +" ld 7,.LC__dl_fini@toc(2)\n" \ +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ + value. This lets our startup code distinguish between a program \ + linked statically, which linux will call with argc on top of the \ + stack which will hopefully never be zero, and a dynamically linked \ + program which will always have a NULL on the top of the stack. \ + Take the opportunity to clear LR, so anyone who accidentally \ + returns from _start gets SEGV. Also clear the next few words of \ + the stack. */ \ +" li 31,0\n" \ +" std 31,0(1)\n" \ +" mtlr 31\n" \ +" std 31,8(1)\n" \ +" std 31,16(1)\n" \ +" std 31,24(1)\n" \ +/* Now, call the start function descriptor at r30... */ \ +" .globl ._dl_main_dispatch\n" \ +"._dl_main_dispatch:\n" \ +" " PPC64_LOAD_FUNCPTR(30) "\n" \ +" bctr\n" \ +".LT__dl_start_user:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__dl_start_user-" BODY_PREFIX "_dl_start_user\n" \ +" .short .LT__dl_start_user_name_end-.LT__dl_start_user_name_start\n" \ +".LT__dl_start_user_name_start:\n" \ +" .ascii \"_dl_start_user\"\n" \ +".LT__dl_start_user_name_end:\n" \ +" .align 2\n" \ +" " END_2(_dl_start_user) "\n" \ +" .popsection"); + +/* ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to + one of the main executable's symbols, as for a COPY reloc. + + To make function pointer comparisons work on most targets, the + relevant ABI states that the address of a non-local function in a + dynamically linked executable is the address of the PLT entry for + that function. This is quite reasonable since using the real + function address in a non-PIC executable would typically require + dynamic relocations in .text, something to be avoided. For such + functions, the linker emits a SHN_UNDEF symbol in the executable + with value equal to the PLT entry address. Normally, SHN_UNDEF + symbols have a value of zero, so this is a clue to ld.so that it + should treat these symbols specially. For relocations not in + ELF_RTYPE_CLASS_PLT (eg. those on function pointers), ld.so should + use the value of the executable SHN_UNDEF symbol, ie. the PLT entry + address. For relocations in ELF_RTYPE_CLASS_PLT (eg. the relocs in + the PLT itself), ld.so should use the value of the corresponding + defined symbol in the object that defines the function, ie. the + real function address. This complicates ld.so in that there are + now two possible values for a given symbol, and it gets even worse + because protected symbols need yet another set of rules. + + On PowerPC64 we don't need any of this. The linker won't emit + SHN_UNDEF symbols with non-zero values. ld.so can make all + relocations behave "normally", ie. always use the real address + like PLT relocations. So always set ELF_RTYPE_CLASS_PLT. */ + +#if _CALL_ELF != 2 +#define elf_machine_type_class(type) \ + (ELF_RTYPE_CLASS_PLT | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#else +/* And now that you have read that large comment, you can disregard it + all for ELFv2. ELFv2 does need the special SHN_UNDEF treatment. */ +#define IS_PPC64_TLS_RELOC(R) \ + (((R) >= R_PPC64_TLS && (R) <= R_PPC64_DTPREL16_HIGHESTA) \ + || ((R) >= R_PPC64_TPREL16_HIGH && (R) <= R_PPC64_DTPREL16_HIGHA)) + +#define elf_machine_type_class(type) \ + ((((type) == R_PPC64_JMP_SLOT \ + || (type) == R_PPC64_ADDR24 \ + || IS_PPC64_TLS_RELOC (type)) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#endif + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +#ifdef SHARED +# define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + __tcb_parse_hwcap_and_convert_at_platform (); +} +#endif + +/* Stuff for the PLT. */ +#if _CALL_ELF != 2 +#define PLT_INITIAL_ENTRY_WORDS 3 +#define PLT_ENTRY_WORDS 3 +#define GLINK_INITIAL_ENTRY_WORDS 8 +/* The first 32k entries of glink can set an index and branch using two + instructions; past that point, glink uses three instructions. */ +#define GLINK_ENTRY_WORDS(I) (((I) < 0x8000)? 2 : 3) +#else +#define PLT_INITIAL_ENTRY_WORDS 2 +#define PLT_ENTRY_WORDS 1 +#define GLINK_INITIAL_ENTRY_WORDS 8 +#define GLINK_ENTRY_WORDS(I) 1 +#endif + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory") +#define PPC_DCBF(where) asm volatile ("dcbf 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ +static inline int __attribute__ ((always_inline)) +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf64_Word i; + Elf64_Word *glink = NULL; + Elf64_Xword *plt = (Elf64_Xword *) D_PTR (map, l_info[DT_PLTGOT]); + Elf64_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf64_Rela)); + Elf64_Addr l_addr = map->l_addr; + Elf64_Dyn **info = map->l_info; + char *p; + + extern void _dl_runtime_resolve (void); + extern void _dl_profile_resolve (void); + + /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. + elf_get_dynamic_info takes care of the standard entries but + doesn't know exactly what to do with processor specific + entries. */ + if (info[DT_PPC64(GLINK)] != NULL) + info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; + + if (lazy) + { + Elf64_Word glink_offset; + Elf64_Word offset; + Elf64_Addr dlrr; + + dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve + : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + +#if _CALL_ELF != 2 + /* We need to stuff the address/TOC of _dl_runtime_resolve + into doublewords 0 and 1 of plt_reserve. Then we need to + stuff the map address into doubleword 2 of plt_reserve. + This allows the GLINK0 code to transfer control to the + correct trampoline which will transfer control to fixup + in dl-machine.c. */ + { + /* The plt_reserve area is the 1st 3 doublewords of the PLT. */ + Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; + Elf64_FuncDesc *resolve_fd = (Elf64_FuncDesc *) dlrr; + plt_reserve->fd_func = resolve_fd->fd_func; + plt_reserve->fd_toc = resolve_fd->fd_toc; + plt_reserve->fd_aux = (Elf64_Addr) map; +#ifdef RTLD_BOOTSTRAP + /* When we're bootstrapping, the opd entry will not have + been relocated yet. */ + plt_reserve->fd_func += l_addr; + plt_reserve->fd_toc += l_addr; +#endif + } +#else + /* When we don't have function descriptors, the first doubleword + of the PLT holds the address of _dl_runtime_resolve, and the + second doubleword holds the map address. */ + plt[0] = dlrr; + plt[1] = (Elf64_Addr) map; +#endif + + /* Set up the lazy PLT entries. */ + glink = (Elf64_Word *) D_PTR (map, l_info[DT_PPC64(GLINK)]); + offset = PLT_INITIAL_ENTRY_WORDS; + glink_offset = GLINK_INITIAL_ENTRY_WORDS; + for (i = 0; i < num_plt_entries; i++) + { + + plt[offset] = (Elf64_Xword) &glink[glink_offset]; + offset += PLT_ENTRY_WORDS; + glink_offset += GLINK_ENTRY_WORDS (i); + } + + /* Now, we've modified data. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. */ + + for (p = (char *) plt; p < (char *) &plt[offset]; p += 16) + PPC_DCBST (p); + PPC_SYNC; + } + } + return lazy; +} + +#if _CALL_ELF == 2 +/* If the PLT entry whose reloc is 'reloc' resolves to a function in + the same object, return the target function's local entry point + offset if usable. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc) +{ + const Elf64_Sym *symtab; + const Elf64_Sym *sym; + + /* If the target function is in a different object, we cannot + use the local entry point. */ + if (sym_map != map) + return 0; + + /* If the linker inserted multiple TOCs, we cannot use the + local entry point. */ + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC)) + return 0; + + /* Otherwise, we can use the local entry point. Retrieve its offset + from the symbol's ELF st_other field. */ + symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); + sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; + + /* If the target function is an ifunc then the local entry offset is + for the resolver, not the final destination. */ + if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + return 0; + + return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other); +} +#endif + +/* Change the PLT entry whose reloc is 'reloc' to call the actual + routine. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_Addr offset = 0; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + PPC_DCBT (&plt->fd_aux); + PPC_DCBT (&plt->fd_func); + + /* If sym_map is NULL, it's a weak undefined sym; Set the plt to + zero. finaladdr should be zero already in this case, but guard + against invalid plt relocations with non-zero addends. */ + if (sym_map == NULL) + finaladdr = 0; + + /* Don't die here if finaladdr is zero, die if this plt entry is + actually called. Makes a difference when LD_BIND_NOW=1. + finaladdr may be zero for a weak undefined symbol, or when an + ifunc resolver returns zero. */ + if (finaladdr == 0) + rel = &zero_fd; + else + { + PPC_DCBT (&rel->fd_aux); + PPC_DCBT (&rel->fd_func); + } + + /* If the opd entry is not yet relocated (because it's from a shared + object that hasn't been processed yet), then manually reloc it. */ + if (finaladdr != 0 && map != sym_map && !sym_map->l_relocated +#if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +#endif + ) + offset = sym_map->l_addr; + + /* For PPC64, fixup_plt copies the function descriptor from opd + over the corresponding PLT entry. + Initially, PLT Entry[i] is set up for lazy linking, or is zero. + For lazy linking, the fd_toc and fd_aux entries are irrelevant, + so for thread safety we write them before changing fd_func. */ + + plt->fd_aux = rel->fd_aux + offset; + plt->fd_toc = rel->fd_toc + offset; + PPC_DCBF (&plt->fd_toc); + PPC_ISYNC; + + plt->fd_func = rel->fd_func + offset; + PPC_DCBST (&plt->fd_func); + PPC_ISYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif + + return finaladdr; +} + +static inline void __attribute__ ((always_inline)) +elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + if (sym_map == NULL) + finaladdr = 0; + + if (finaladdr == 0) + rel = &zero_fd; + + plt->fd_func = rel->fd_func; + plt->fd_aux = rel->fd_aux; + plt->fd_toc = rel->fd_toc; + PPC_DCBST (&plt->fd_func); + PPC_DCBST (&plt->fd_aux); + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + + +/* Names of the architecture-specific auditing callback functions. */ +#if _CALL_ELF != 2 +#define ARCH_LA_PLTENTER ppc64_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit +#else +#define ARCH_LA_PLTENTER ppc64v2_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64v2_gnu_pltexit +#endif + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHERA(v) PPC_HIGHER ((v) + 0x8000) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) +#define PPC_HIGHESTA(v) PPC_HIGHEST ((v) + 0x8000) +#define BIT_INSERT(var, val, mask) \ + ((var) = ((var) & ~(Elf64_Addr) (mask)) | ((val) & (mask))) + +#define dont_expect(X) __builtin_expect ((X), 0) + +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) + attribute_hidden; + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +/* This computes the value used by TPREL* relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline, const)) +elf_machine_tprel (struct link_map *map, + struct link_map *sym_map, + const Elf64_Sym *sym, + const Elf64_Rela *reloc) +{ +#ifndef RTLD_BOOTSTRAP + if (sym_map) + { + CHECK_STATIC_TLS (map, sym_map); +#endif + return TLS_TPREL_VALUE (sym_map, sym, reloc); +#ifndef RTLD_BOOTSTRAP + } +#endif + return 0; +} + +/* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline)) +resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) +{ +#if _CALL_ELF != 2 +#ifndef RESOLVE_CONFLICT_FIND_MAP + /* The function we are calling may not yet have its opd entry relocated. */ + Elf64_FuncDesc opd; + if (map != sym_map +# if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +# endif + && !sym_map->l_relocated) + { + Elf64_FuncDesc *func = (Elf64_FuncDesc *) value; + opd.fd_func = func->fd_func + sym_map->l_addr; + opd.fd_toc = func->fd_toc + sym_map->l_addr; + opd.fd_aux = func->fd_aux; + /* GCC 4.9+ eliminates the branch as dead code, force the odp set + dependency. */ + asm ("" : "=r" (value) : "0" (&opd), "X" (opd)); + } +#endif +#endif + return ((Elf64_Addr (*) (unsigned long int)) value) (GLRO(dl_hwcap)); +} + +/* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + void *const reloc_addr_arg, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + const int r_type = ELF64_R_TYPE (reloc->r_info); + const Elf64_Sym *const refsym = sym; + union unaligned + { + uint16_t u2; + uint32_t u4; + uint64_t u8; + } __attribute__ ((__packed__)); + + if (r_type == R_PPC64_RELATIVE) + { + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (__glibc_unlikely (r_type == R_PPC64_NONE)) + return; + + /* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt + and STT_GNU_IFUNC. */ + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + Elf64_Addr value = ((sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value) + + reloc->r_addend); + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = resolve_ifunc (value, map, sym_map); + + /* For relocs that don't edit code, return. + For relocs that might edit instructions, break from the switch. */ + switch (r_type) + { + case R_PPC64_ADDR64: + case R_PPC64_GLOB_DAT: + *reloc_addr = value; + return; + + case R_PPC64_IRELATIVE: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + *reloc_addr = value; + return; + + case R_PPC64_JMP_IREL: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + /* Fall thru */ + case R_PPC64_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value); +#else + elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); +#endif + return; + + case R_PPC64_DTPMOD64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; +#else + if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; + } + } +#endif + } +#ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; +#else + /* Get the information from the link map returned by the + resolve function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +#endif + return; + + case R_PPC64_DTPREL64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; +#else + if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC64_DTPMOD64. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; + } + } +#endif + } + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ +#ifndef RTLD_BOOTSTRAP + if (sym_map != NULL) + *reloc_addr = TLS_DTPREL_VALUE (sym, reloc); +#endif + return; + + case R_PPC64_TPREL64: + *reloc_addr = elf_machine_tprel (map, sym_map, sym, reloc); + return; + + case R_PPC64_TPREL16_LO_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_LO: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_HI: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HI", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HIGH: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HA", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHER: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_TPREL16_HIGHEST: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_TPREL16_HIGHERA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_TPREL16_HIGHESTA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + +#ifndef RTLD_BOOTSTRAP /* None of the following appear in ld.so */ + case R_PPC64_ADDR16_LO_DS: + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_LO: + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_ADDR16_HI: + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HI", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGH: + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_ADDR16_HA: + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HA", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGHA: + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_ADDR30: + { + Elf64_Addr delta = value - (Elf64_Xword) reloc_addr; + if (dont_expect ((delta + 0x80000000) >= 0x100000000LL + || (delta & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR30", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, delta, 0xfffffffc); + } + break; + + case R_PPC64_COPY: + if (dont_expect (sym == NULL)) + /* This can happen in trace mode when an object could not be found. */ + return; + if (dont_expect (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) + && sym->st_size < refsym->st_size))) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("%s: Symbol `%s' has different size" \ + " in shared object," \ + " consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (char *) value, + MIN (sym->st_size, refsym->st_size)); + return; + + case R_PPC64_UADDR64: + ((union unaligned *) reloc_addr)->u8 = value; + return; + + case R_PPC64_UADDR32: + ((union unaligned *) reloc_addr)->u4 = value; + return; + + case R_PPC64_ADDR32: + if (dont_expect ((value + 0x80000000) >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR32", reloc_addr, refsym); + *(Elf64_Word *) reloc_addr = value; + return; + + case R_PPC64_ADDR24: + if (dont_expect ((value + 0x2000000) >= 0x4000000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR24", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, value, 0x3fffffc); + break; + + case R_PPC64_ADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = value; + break; + + case R_PPC64_UADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym); + ((union unaligned *) reloc_addr)->u2 = value; + return; + + case R_PPC64_ADDR16_DS: + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_HIGHER: + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_ADDR16_HIGHERA: + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_ADDR16_HIGHESTA: + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + + case R_PPC64_ADDR14: + case R_PPC64_ADDR14_BRTAKEN: + case R_PPC64_ADDR14_BRNTAKEN: + { + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR14", reloc_addr, refsym); + Elf64_Word insn = *(Elf64_Word *) reloc_addr; + BIT_INSERT (insn, value, 0xfffc); + if (r_type != R_PPC64_ADDR14) + { + insn &= ~(1 << 21); + if (r_type == R_PPC64_ADDR14_BRTAKEN) + insn |= 1 << 21; + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + *(Elf64_Word *) reloc_addr = insn; + } + break; + + case R_PPC64_REL32: + *(Elf64_Word *) reloc_addr = value - (Elf64_Addr) reloc_addr; + return; + + case R_PPC64_REL64: + *reloc_addr = value - (Elf64_Addr) reloc_addr; + return; +#endif /* !RTLD_BOOTSTRAP */ + + default: + _dl_reloc_bad_type (map, r_type, 0); + return; + } + MODIFIED_CODE_NOQUEUE (reloc_addr); +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + /* elf_machine_runtime_setup handles this. */ +} + + +#endif /* RESOLVE */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S new file mode 100644 index 0000000000..5ec729d1f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -0,0 +1,500 @@ +/* PLT trampolines. PPC64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + + + .section ".text" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) need to be converted to an offset + (index * 24) in parm2 (r4). */ + +#define FRAME_SIZE (FRAME_MIN_SIZE+64) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; Use local var space rather than the parameter save area, + because gcc as of 2010/05 doesn't allocate a proper stack frame for + a function that makes no calls except for __tls_get_addr and we + might be here resolving the __tls_get_addr call. */ +#define INT_PARMS FRAME_MIN_SIZE +EALIGN(_dl_runtime_resolve, 4, 0) + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 + std r5,INT_PARMS+16(r1) + add r4,r4,r0 + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 + std r7,INT_PARMS+32(r1) + mflr r0 + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + std r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + bl JUMPTARGET(_dl_fixup) +#ifndef SHARED + nop +#endif +/* Put the registers back. */ + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE + bctr +END(_dl_runtime_resolve) +#undef FRAME_SIZE +#undef INT_PARMS + + /* Stack layout: ELFv2 ABI. + +752 previous backchain + +744 spill_r31 + +736 spill_r30 + +720 v8 + +704 v7 + +688 v6 + +672 v5 + +656 v4 + +640 v3 + +624 v2 + +608 v1 + +600 fp10 + ELFv1 ABI +592 fp9 + +592 previous backchain +584 fp8 + +584 spill_r31 +576 fp7 + +576 spill_r30 +568 fp6 + +560 v1 +560 fp5 + +552 fp4 +552 fp4 + +544 fp3 +544 fp3 + +536 fp2 +536 fp2 + +528 fp1 +528 fp1 + +520 r4 +520 r4 + +512 r3 +512 r3 + return values + +504 free + +496 stackframe + +488 lr + +480 r1 + +464 v13 + +448 v12 + +432 v11 + +416 v10 + +400 v9 + +384 v8 + +368 v7 + +352 v6 + +336 v5 + +320 v4 + +304 v3 + +288 v2 + * VMX Parms in V2-V13, V0-V1 are scratch + +284 vrsave + +280 free + +272 fp13 + +264 fp12 + +256 fp11 + +248 fp10 + +240 fp9 + +232 fp8 + +224 fp7 + +216 fp6 + +208 fp5 + +200 fp4 + +192 fp3 + +184 fp2 + +176 fp1 + * FP Parms in FP1-FP13, FP0 is a scratch register + +168 r10 + +160 r9 + +152 r8 + +144 r7 + +136 r6 + +128 r5 + +120 r4 + +112 r3 + * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC + +104 parm8 + +96 parm7 + +88 parm6 + +80 parm5 + +72 parm4 + +64 parm3 + +56 parm2 + +48 parm1 + * Parameter save area + * (v1 ABI: Allocated by the call, at least 8 double words) + +40 v1 ABI: TOC save area + +32 v1 ABI: Reserved for linker + +24 v1 ABI: Reserved for compiler / v2 ABI: TOC save area + +16 LR save area + +8 CR save area + r1+0 stack back chain + */ +#if _CALL_ELF == 2 +# define FRAME_SIZE 752 +# define VR_RTN 608 +#else +# define FRAME_SIZE 592 +# define VR_RTN 560 +#endif +#define INT_RTN 512 +#define FPR_RTN 528 +#define STACK_FRAME 496 +#define CALLING_LR 488 +#define CALLING_SP 480 +#define INT_PARMS 112 +#define FPR_PARMS 176 +#define VR_PARMS 288 +#define VR_VRSAVE 284 + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" + + .machine "altivec" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) needs to be converted to an offset + (index * 24) in parm2 (r4). */ +#ifndef PROF +EALIGN(_dl_profile_resolve, 4, 0) +/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we + need to call _dl_call_pltexit. */ + std r31,-8(r1) + std r30,-16(r1) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; the registers are saved in a stack frame. */ + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + cfi_offset(r31,-8) + cfi_offset(r30,-16) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 /* index * 2 */ + std r5,INT_PARMS+16(r1) + add r4,r4,r0 /* index * 3 */ + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 /* index * 24 == PLT offset */ + mflr r5 + std r7,INT_PARMS+32(r1) + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + la r8,FRAME_SIZE(r1) + std r5,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r5,CALLING_LR(r1) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) + ld r12,.LC__dl_hwcap@toc(r2) +#ifdef SHARED + /* Load _rtld_local_ro._dl_hwcap. */ + ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) +#else + ld r12,0(r12) /* Load extern _dl_hwcap. */ +#endif + andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(saveFP) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + li r11,32 + li r12,64 + stvx v2,0,r10 + stvx v3,0,r9 + + stvx v4,r11,r10 + stvx v5,r11,r9 + addi r11,r11,64 + + stvx v6,r12,r10 + stvx v7,r12,r9 + addi r12,r12,64 + + stvx v8,r11,r10 + stvx v9,r11,r9 + addi r11,r11,64 + + stvx v10,r12,r10 + stvx v11,r12,r9 + mfspr r0,VRSAVE + + stvx v12,r11,r10 + stvx v13,r11,r9 +L(saveFP): + stw r0,VR_VRSAVE(r1) +/* Save floating registers. */ + stfd fp1,FPR_PARMS+0(r1) + stfd fp2,FPR_PARMS+8(r1) + stfd fp3,FPR_PARMS+16(r1) + stfd fp4,FPR_PARMS+24(r1) + stfd fp5,FPR_PARMS+32(r1) + stfd fp6,FPR_PARMS+40(r1) + stfd fp7,FPR_PARMS+48(r1) + stfd fp8,FPR_PARMS+56(r1) + stfd fp9,FPR_PARMS+64(r1) + stfd fp10,FPR_PARMS+72(r1) + stfd fp11,FPR_PARMS+80(r1) + li r0,-1 + stfd fp12,FPR_PARMS+88(r1) + stfd fp13,FPR_PARMS+96(r1) +/* Load the extra parameters. */ + addi r6,r1,INT_PARMS + addi r7,r1,STACK_FRAME +/* Save link_map* and reloc_addr parms for later. */ + mr r31,r3 + mr r30,r4 + std r0,0(r7) + bl JUMPTARGET(_dl_profile_fixup) +#ifndef SHARED + nop +#endif +/* Test *framesizep > 0 to see if need to do pltexit processing. */ + ld r0,STACK_FRAME(r1) +/* Put the registers back. */ + lwz r12,VR_VRSAVE(r1) + cmpdi cr1,r0,0 + cmpdi cr0,r12,0 + bgt cr1,L(do_pltexit) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) +/* VRSAVE must be non-zero if VMX is present and VRs are in use. */ + beq L(restoreFXR) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Unwind the stack frame, and jump. */ + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + addi r1,r1,FRAME_SIZE + bctr + +L(do_pltexit): + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + beq L(restoreFXR2) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR2): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + std r2,FRAME_TOC_SAVE(r1) + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Call the target function. */ + bctrl + ld r2,FRAME_TOC_SAVE(r1) + lwz r12,VR_VRSAVE(r1) +/* But return here and store the return values. */ + std r3,INT_RTN(r1) + std r4,INT_RTN+8(r1) + stfd fp1,FPR_RTN+0(r1) + stfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) + stfd fp3,FPR_RTN+16(r1) + stfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + stfd fp5,FPR_RTN+32(r1) + stfd fp6,FPR_RTN+40(r1) + li r5,32 + li r6,64 + stfd fp7,FPR_RTN+48(r1) + stfd fp8,FPR_RTN+56(r1) + stfd fp9,FPR_RTN+64(r1) + stfd fp10,FPR_RTN+72(r1) +#endif + mr r3,r31 + mr r4,r30 + beq L(callpltexit) + stvx v2,0,r10 +#if _CALL_ELF == 2 + stvx v3,0,r12 + stvx v4,r5,r10 + stvx v5,r5,r12 + addi r5,r5,64 + stvx v6,r6,r10 + stvx v7,r6,r12 + stvx v8,r5,r10 + stvx v9,r5,r12 +#endif +L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN + bl JUMPTARGET(_dl_call_pltexit) +#ifndef SHARED + nop +#endif +/* Restore the return values from target function. */ + lwz r12,VR_VRSAVE(r1) + ld r3,INT_RTN(r1) + ld r4,INT_RTN+8(r1) + lfd fp1,FPR_RTN+0(r1) + lfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r11,VR_RTN(r1) + lfd fp3,FPR_RTN+16(r1) + lfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + lfd fp5,FPR_RTN+32(r1) + lfd fp6,FPR_RTN+40(r1) + li r30,32 + li r31,64 + lfd fp7,FPR_RTN+48(r1) + lfd fp8,FPR_RTN+56(r1) + lfd fp9,FPR_RTN+64(r1) + lfd fp10,FPR_RTN+72(r1) +#endif + beq L(pltexitreturn) + lvx v2,0,r11 +#if _CALL_ELF == 2 + lvx v3,0,r12 + lvx v4,r30,r11 + lvx v5,r30,r12 + addi r30,r30,64 + lvx v6,r31,r11 + lvx v7,r31,r12 + lvx v8,r30,r11 + lvx v9,r30,r12 +#endif +L(pltexitreturn): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + mtlr r0 + ld r1,0(r1) + blr +END(_dl_profile_resolve) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h new file mode 100644 index 0000000000..9131d9ceb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h @@ -0,0 +1,37 @@ +/* Finding the entry point and start of text. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#ifndef __ASSEMBLY__ +extern void _start (void); +#endif + +#define ENTRY_POINT _start + +#if _CALL_ELF != 2 +/* We have to provide a special declaration. */ +#define ENTRY_POINT_DECL(class) class void _start (void); + +/* Use the address of ._start as the lowest address for which we need + to keep profiling records. We can't copy the ia64 scheme as our + entry poiny address is really the address of the function + descriptor, not the actual function entry. */ +#define TEXT_START \ + ({ extern unsigned long int _start_as_data[] asm ("_start"); \ + _start_as_data[0]; }) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c new file mode 100644 index 0000000000..ae18f127a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c @@ -0,0 +1,37 @@ +/* Find first set bit in a word, counted from least significant end. + For PowerPC. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define ffsl __something_else +#include <limits.h> +#include <string.h> + +#undef ffs + +int +__ffsll (long long int x) +{ + int cnt; + + asm ("cntlzd %0,%1" : "=r" (cnt) : "r" (x & -x)); + return 64 - cnt; +} +weak_alias (__ffsll, ffsll) +#undef ffsl +weak_alias (__ffsll, ffsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile new file mode 100644 index 0000000000..317a988854 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -0,0 +1,44 @@ +ifeq ($(subdir),math) +sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_copysign-power6 \ + s_copysign-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 s_isnan-power8 \ + s_isinf-power8 s_finite-power8 + +libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_llround-power6x \ + s_llround-power5+ s_llround-ppc64 s_ceil-power5+ \ + s_ceil-ppc64 s_ceilf-power5+ s_ceilf-ppc64 \ + s_floor-power5+ s_floor-ppc64 s_floorf-power5+ \ + s_floorf-ppc64 s_round-power5+ s_round-ppc64 \ + s_roundf-power5+ s_roundf-ppc64 s_trunc-power5+ \ + s_trunc-ppc64 s_truncf-power5+ s_truncf-ppc64 \ + s_copysign-power6 s_copysign-ppc64 s_llrint-power6x \ + s_llrint-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_logb-power7 s_logbf-power7 \ + s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \ + s_logbl-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ + s_llrint-power8 s_llround-power8 \ + e_expf-power8 e_expf-ppc64 \ + s_sinf-ppc64 s_sinf-power8 \ + s_cosf-ppc64 s_cosf-power8 + +CFLAGS-s_logbf-power7.c = -mcpu=power7 +CFLAGS-s_logbl-power7.c = -mcpu=power7 +CFLAGS-s_logb-power7.c = -mcpu=power7 +CFLAGS-s_modf-power5+.c = -mcpu=power5+ +CFLAGS-s_modff-power5+.c = -mcpu=power5+ +CFLAGS-e_hypot-power7.c = -mcpu=power7 +CFLAGS-e_hypotf-power7.c = -mcpu=power7 + +# These files quiet sNaNs in a way that is optimized away without +# -fsignaling-nans. +CFLAGS-s_modf-ppc64.c += -fsignaling-nans +CFLAGS-s_modff-ppc64.c += -fsignaling-nans +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S new file mode 100644 index 0000000000..1e6cc51d9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S @@ -0,0 +1,26 @@ +/* __ieee754_expf() POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c new file mode 100644 index 0000000000..b236290ea2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c @@ -0,0 +1,24 @@ +/* __ieee_expf() PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_ppc64 + +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c new file mode 100644 index 0000000000..577093675c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c @@ -0,0 +1,31 @@ +/* Multiple versions of ieee754_expf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include "init-arch.h" + +extern __typeof (__ieee754_expf) __ieee754_expf_ppc64 attribute_hidden; +extern __typeof (__ieee754_expf) __ieee754_expf_power8 attribute_hidden; + +libc_ifunc (__ieee754_expf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __ieee754_expf_power8 + : __ieee754_expf_ppc64); + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c new file mode 100644 index 0000000000..dbe9b33e2e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypot() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c new file mode 100644 index 0000000000..baebb36ae3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c new file mode 100644 index 0000000000..6a3d60a830 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypot, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypot_power7 + : __ieee754_hypot_ppc64); + +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c new file mode 100644 index 0000000000..70584863f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypotf() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c new file mode 100644 index 0000000000..839e94e56c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c new file mode 100644 index 0000000000..2c8112d3b2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypotf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypotf_power7 + : __ieee754_hypotf_ppc64); + +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S new file mode 100644 index 0000000000..bbea647da7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S new file mode 100644 index 0000000000..c19bb42329 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..968e8cb17e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c @@ -0,0 +1,40 @@ +/* Multiple versions of ceil. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceil) __ceil_ppc64 attribute_hidden; +extern __typeof (__ceil) __ceil_power5plus attribute_hidden; + +libc_ifunc (__ceil, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceil_power5plus + : __ceil_ppc64); + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S new file mode 100644 index 0000000000..8e875ce679 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S new file mode 100644 index 0000000000..c9d31da2a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..7d4a028fe5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ceilf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceilf) __ceilf_ppc64 attribute_hidden; +extern __typeof (__ceilf) __ceilf_power5plus attribute_hidden; + +libc_ifunc (__ceilf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceilf_power5plus + : __ceilf_ppc64); + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S new file mode 100644 index 0000000000..d59fbfae73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S @@ -0,0 +1,33 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) +#undef hidden_def +#define hidden_def(name) + +#define __copysign __copysign_power6 + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S new file mode 100644 index 0000000000..3f4fbc96ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S @@ -0,0 +1,35 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __copysign __copysign_ppc64 +#undef hidden_def +#define hidden_def(name) \ + strong_alias (__copysign_ppc64, __GI___copysign) + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c new file mode 100644 index 0000000000..2bfb625bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c @@ -0,0 +1,51 @@ +/* Multiple versions of copysign. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine copysign so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#undef __copysign +#define __copysign __redirect_copysign +#include <math.h> +#include <math_ldbl_opt.h> +#undef __copysign +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__redirect_copysign) __copysign_ppc64 attribute_hidden; +extern __typeof (__redirect_copysign) __copysign_power6 attribute_hidden; + +extern __typeof (__redirect_copysign) __libm_copysign; +libc_ifunc (__libm_copysign, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +strong_alias (__libm_copysign, __copysign) +weak_alias (__copysign, copysign) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c new file mode 100644 index 0000000000..c9be2b6811 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c @@ -0,0 +1,32 @@ +/* Multiple versions of copysignf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__copysignf) __copysign_ppc64 attribute_hidden; +extern __typeof (__copysignf) __copysign_power6 attribute_hidden; + +libc_ifunc (__copysignf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S new file mode 100644 index 0000000000..ee00a2c43a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64/power8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __cosf __cosf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c new file mode 100644 index 0000000000..635624c538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64 default version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __cosf __cosf_ppc64 + +#include <sysdeps/powerpc/fpu/s_cosf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c new file mode 100644 index 0000000000..acf2a59d69 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c @@ -0,0 +1,31 @@ +/* Multiple versions of cosf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__cosf) __cosf_ppc64 attribute_hidden; +extern __typeof (__cosf) __cosf_power8 attribute_hidden; + +libc_ifunc (__cosf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __cosf_power8 + : __cosf_ppc64); + +weak_alias (__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S new file mode 100644 index 0000000000..9220383ee6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S new file mode 100644 index 0000000000..fa878ab3e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c new file mode 100644 index 0000000000..fabd9b0e3d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c @@ -0,0 +1,34 @@ +/* finite(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define FINITE __finite_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finite_ppc64, __GI___finite, __finite_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_finite.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c new file mode 100644 index 0000000000..a5ec36b72f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c @@ -0,0 +1,60 @@ +/* Multiple versions of finite. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finite __redirect___finite +#define __finitef __redirect___finitef +#define __finitel __redirect___finitel +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finite) __finite_ppc64 attribute_hidden; +extern __typeof (__finite) __finite_power7 attribute_hidden; +extern __typeof (__finite) __finite_power8 attribute_hidden; +#undef __finite +#undef __finitef +#undef __finitel + +libc_ifunc_redirected (__redirect___finite, __finite, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finite_ppc64); + +weak_alias (__finite, finite) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0); +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1); +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c new file mode 100644 index 0000000000..c6f8033e6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c @@ -0,0 +1,32 @@ +/* finitef(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define FINITEF __finitef_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finitef_ppc64, __GI___finitef, __finitef_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_finitef.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c new file mode 100644 index 0000000000..cdd7824efb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c @@ -0,0 +1,37 @@ +/* Multiple versions of finitef. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finitef __redirect___finitef +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finitef) __finitef_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__finitef) __finite_power7 attribute_hidden; +extern __typeof (__finitef) __finite_power8 attribute_hidden; +#undef __finitef + +libc_ifunc_redirected (__redirect___finitef, __finitef, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finitef_ppc64); + +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S new file mode 100644 index 0000000000..24f2460693 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S new file mode 100644 index 0000000000..5ec9a33d89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..6ab7a35490 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c @@ -0,0 +1,40 @@ +/* Multiple versions of floor. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floor) __floor_ppc64 attribute_hidden; +extern __typeof (__floor) __floor_power5plus attribute_hidden; + +libc_ifunc (__floor, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floor_power5plus + : __floor_ppc64); + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S new file mode 100644 index 0000000000..8b621de68e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S @@ -0,0 +1,26 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S new file mode 100644 index 0000000000..3feea6e162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S @@ -0,0 +1,27 @@ +/* floorf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..ee96536247 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Multiple versions of floorf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floorf) __floorf_ppc64 attribute_hidden; +extern __typeof (__floorf) __floorf_power5plus attribute_hidden; + +libc_ifunc (__floorf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floorf_power5plus + : __floorf_ppc64); + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S new file mode 100644 index 0000000000..33a7e3de1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S new file mode 100644 index 0000000000..b630696927 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c new file mode 100644 index 0000000000..e7f64438b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __isinf __isinf_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinf_ppc64, __GI___isinf, __isinf_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_isinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c new file mode 100644 index 0000000000..9c6789c7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c @@ -0,0 +1,53 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinf __redirect___isinf +#define __isinff __redirect___isinff +#define __isinfl __redirect___isinfl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinf) __isinf_ppc64 attribute_hidden; +extern __typeof (__isinf) __isinf_power7 attribute_hidden; +extern __typeof (__isinf) __isinf_power8 attribute_hidden; +#undef __isinf +#undef __isinff +#undef __isinfl + +libc_ifunc_redirected (__redirect___isinf, __isinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinf_ppc64); + +weak_alias (__isinf, isinf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c new file mode 100644 index 0000000000..e58e0b53be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c @@ -0,0 +1,31 @@ +/* isinff(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __isinff __isinff_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinff_ppc64, __GI___isinff, __isinff_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_isinff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c new file mode 100644 index 0000000000..439e0b80d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c @@ -0,0 +1,38 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinff __redirect___isinff +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinff) __isinff_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__isinff) __isinf_power7 attribute_hidden; +extern __typeof (__isinff) __isinf_power8 attribute_hidden; +#undef __isinff + +libc_ifunc_redirected (__redirect___isinff, __isinff, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinff_ppc64); + +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S new file mode 100644 index 0000000000..18d368a63b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power5 + +#include <sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S new file mode 100644 index 0000000000..7f0eae0430 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6 + +#include <sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S new file mode 100644 index 0000000000..aa283096ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6X version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S new file mode 100644 index 0000000000..b67d58e2ea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S new file mode 100644 index 0000000000..03151b3087 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S new file mode 100644 index 0000000000..ee219c14be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S @@ -0,0 +1,32 @@ +/* isnan(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __isnan __isnan_ppc64 +#undef hidden_def +#define hidden_def(name) \ + .globl __GI___isnan ; .set __GI___isnan,__isnan_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c new file mode 100644 index 0000000000..3cfe1793da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c @@ -0,0 +1,62 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isnan __redirect___isnan +#define __isnanf __redirect___isnanf +#define __isnanl __redirect___isnanl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isnan) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnan) __isnan_power5 attribute_hidden; +extern __typeof (__isnan) __isnan_power6 attribute_hidden; +extern __typeof (__isnan) __isnan_power6x attribute_hidden; +extern __typeof (__isnan) __isnan_power7 attribute_hidden; +extern __typeof (__isnan) __isnan_power8 attribute_hidden; +#undef __isnan +#undef __isnanf +#undef __isnanl + +libc_ifunc_redirected (__redirect___isnan, __isnan, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c new file mode 100644 index 0000000000..958c373245 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c @@ -0,0 +1,44 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* The double-precision implementation also works for the single one. */ +extern __typeof (__isnanf) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnanf) __isnan_power5 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6x attribute_hidden; +extern __typeof (__isnanf) __isnan_power7 attribute_hidden; +extern __typeof (__isnanf) __isnan_power8 attribute_hidden; + +libc_ifunc_hidden (__isnanf, __isnanf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S new file mode 100644 index 0000000000..f9b1616be3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S new file mode 100644 index 0000000000..b7f5276a66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S new file mode 100644 index 0000000000..b92dafbcdb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c new file mode 100644 index 0000000000..8db494cfde --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c @@ -0,0 +1,60 @@ +/* Multiple versions of llrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine lrint/__lrint so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#define lrint __hidden_lrint +#define __lrint __hidden___lrint + +#include <math.h> +#include <math_ldbl_opt.h> +#undef lrint +#undef __lrint +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrint) __llrint_ppc64 attribute_hidden; +extern __typeof (__llrint) __llrint_power6x attribute_hidden; +extern __typeof (__llrint) __llrint_power8 attribute_hidden; + +libc_ifunc (__llrint, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llrint_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llrint_power6x + : __llrint_ppc64); + +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PowerPC64. */ +strong_alias (__llrint, __lrint) +weak_alias (__lrint, lrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S new file mode 100644 index 0000000000..b8305ce968 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power5plus +#define __lround __lround_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S new file mode 100644 index 0000000000..1145aff2a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power6x +#define __lround __lround_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S new file mode 100644 index 0000000000..8d6190df89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S @@ -0,0 +1,31 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S new file mode 100644 index 0000000000..8e6f9aed78 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S @@ -0,0 +1,28 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_ppc64 +#define __lround __lround_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c new file mode 100644 index 0000000000..cb1a446158 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c @@ -0,0 +1,63 @@ +/* Multiple versions of llround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define lround __hidden_lround +#define __lround __hidden___lround + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llround) __llround_ppc64 attribute_hidden; +extern __typeof (__llround) __llround_power5plus attribute_hidden; +extern __typeof (__llround) __llround_power6x attribute_hidden; +extern __typeof (__llround) __llround_power8 attribute_hidden; + +libc_ifunc (__llround, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llround_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llround_power6x : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc64); + +weak_alias (__llround, llround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +compat_symbol (libm, llround, lroundl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PPC64. */ +#undef lround +#undef __lround +strong_alias (__llround, __lround) +weak_alias (__llround, lround) +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c new file mode 100644 index 0000000000..73b5e2d48c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c new file mode 100644 index 0000000000..e428b9a29a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c @@ -0,0 +1,28 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __logb __logb_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c new file mode 100644 index 0000000000..d70919e3d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c @@ -0,0 +1,41 @@ +/* Multiple versions of logb. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logb) __logb_ppc64 attribute_hidden; +extern __typeof (__logb) __logb_power7 attribute_hidden; + +libc_ifunc (__logb, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logb_power7 + : __logb_ppc64); + +weak_alias (__logb, logb) + +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c new file mode 100644 index 0000000000..02e04318e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c new file mode 100644 index 0000000000..147b710c73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC64 default implementation. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c new file mode 100644 index 0000000000..1cacc8a950 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbf) __logbf_ppc64 attribute_hidden; +extern __typeof (__logbf) __logbf_power7 attribute_hidden; + +libc_ifunc (__logbf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbf_power7 + : __logbf_ppc64); + +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c new file mode 100644 index 0000000000..60ec533b8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c new file mode 100644 index 0000000000..502410f877 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC64/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_ppc64 + +#include <sysdeps/ieee754/ldbl-128ibm/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c new file mode 100644 index 0000000000..63b9c812e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbl. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbl) __logbl_ppc64 attribute_hidden; +extern __typeof (__logbl) __logbl_power7 attribute_hidden; + +libc_ifunc (__logbl, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbl_power7 + : __logbl_ppc64); + +long_double_symbol (libm, __logbl, logbl); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c new file mode 100644 index 0000000000..d09286267b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c @@ -0,0 +1 @@ + /* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c new file mode 100644 index 0000000000..0dab5443e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c @@ -0,0 +1 @@ +/* __lround is in s_llround.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c new file mode 100644 index 0000000000..c923f84d97 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c new file mode 100644 index 0000000000..43318ee4dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __modf __modf_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..3e79b2bd5a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c @@ -0,0 +1,44 @@ +/* Multiple versions of modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__modf) __modf_ppc64 attribute_hidden; +extern __typeof (__modf) __modf_power5plus attribute_hidden; + +libc_ifunc (__modf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modf_power5plus + : __modf_ppc64); + +weak_alias (__modf, modf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c new file mode 100644 index 0000000000..22dbf5341e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c new file mode 100644 index 0000000000..6fc97f0114 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c @@ -0,0 +1,26 @@ +/* PowerPC64 default implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_ppc64 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..f57939cc66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c @@ -0,0 +1,30 @@ +/* Multiple versions of modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +extern __typeof (__modff) __modff_ppc64 attribute_hidden; +extern __typeof (__modff) __modff_power5plus attribute_hidden; + +libc_ifunc (__modff, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modff_power5plus + : __modff_ppc64); + +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S new file mode 100644 index 0000000000..a7c7492f21 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S new file mode 100644 index 0000000000..44a2b0105a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c new file mode 100644 index 0000000000..d440f6f45c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c @@ -0,0 +1,40 @@ +/* Multiple versions of round. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__round) __round_ppc64 attribute_hidden; +extern __typeof (__round) __round_power5plus attribute_hidden; + +libc_ifunc (__round, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __round_power5plus + : __round_ppc64); + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +strong_alias (__round, __roundl) +weak_alias (__round, roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __round, roundl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S new file mode 100644 index 0000000000..81501a1547 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S new file mode 100644 index 0000000000..8f3b24c556 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c new file mode 100644 index 0000000000..09609d3e91 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c @@ -0,0 +1,32 @@ +/* Multiple versions of roundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__roundf) __roundf_ppc64 attribute_hidden; +extern __typeof (__roundf) __roundf_power5plus attribute_hidden; + +libc_ifunc (__roundf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __roundf_power5plus + : __roundf_ppc64); + +weak_alias (__roundf, roundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S new file mode 100644 index 0000000000..3d01533da8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c new file mode 100644 index 0000000000..83e37f92c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64 default version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c new file mode 100644 index 0000000000..6d7d6ce50d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c @@ -0,0 +1,31 @@ +/* Multiple versions of sinf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sinf) __sinf_ppc64 attribute_hidden; +extern __typeof (__sinf) __sinf_power8 attribute_hidden; + +libc_ifunc (__sinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __sinf_power8 + : __sinf_ppc64); + +weak_alias (__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S new file mode 100644 index 0000000000..53d8cd5013 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S new file mode 100644 index 0000000000..36e8fd05c2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..54844d5ff2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c @@ -0,0 +1,40 @@ +/* Multiple versions of trunc. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__trunc) __trunc_ppc64 attribute_hidden; +extern __typeof (__trunc) __trunc_power5plus attribute_hidden; + +libc_ifunc (__trunc, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __trunc_power5plus + : __trunc_ppc64); + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __trunc, truncl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S new file mode 100644 index 0000000000..e28de7cb1e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S new file mode 100644 index 0000000000..b60242d83b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..2c46525235 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Multiple versions of truncf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__truncf) __truncf_ppc64 attribute_hidden; +extern __typeof (__truncf) __truncf_power5plus attribute_hidden; + +libc_ifunc (__truncf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __truncf_power5plus + : __truncf_ppc64); + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S new file mode 100644 index 0000000000..78d7feefed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S @@ -0,0 +1,72 @@ +/* ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S new file mode 100644 index 0000000000..bc5ab02cb0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S @@ -0,0 +1,66 @@ +/* float ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S new file mode 100644 index 0000000000..59472816c7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S @@ -0,0 +1,59 @@ +/* Copy a sign bit between floating-point values. PowerPC64 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysign) + CALL_MCOUNT 0 +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp2,-8(r1) + nop + nop + nop + ld r3,-8(r1) + cmpdi r3,0 + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias (__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias (__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S new file mode 100644 index 0000000000..e05438ae7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S new file mode 100644 index 0000000000..b33ea6e256 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S @@ -0,0 +1,48 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp3,-16(r1) + ld r3,-16(r1) + cmpdi r3,0 + blt L(0) + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +L(0): + fmr fp0,fp1 + fnabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp3,fp2 + blr +END (__copysignl) + +#if IS_IN (libm) +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S new file mode 100644 index 0000000000..53d21301ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S new file mode 100644 index 0000000000..7603abba5d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S @@ -0,0 +1,34 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S new file mode 100644 index 0000000000..4a6cc0ebba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S @@ -0,0 +1,72 @@ +/* Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S new file mode 100644 index 0000000000..d8b5e21248 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S @@ -0,0 +1,66 @@ +/* float Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S new file mode 100644 index 0000000000..d40695c633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fma.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fma, fmal, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S new file mode 100644 index 0000000000..6cba2d4408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S new file mode 100644 index 0000000000..39e765434a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S @@ -0,0 +1,47 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S new file mode 100644 index 0000000000..4050be6437 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S @@ -0,0 +1,36 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrintf) + +strong_alias (__llrintf, __lrintf) +weak_alias (__llrintf, llrintf) +weak_alias (__lrintf, lrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S new file mode 100644 index 0000000000..0803ba1eb3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S @@ -0,0 +1,96 @@ +/* llround function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* long long [r3] llround (double x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^52 and 2^53-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^52 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llround) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^52 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^52 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^52 > x < -2^52 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp9 + fsub fp3,fp3,fp9 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S new file mode 100644 index 0000000000..3e910ac322 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S @@ -0,0 +1,88 @@ +/* llroundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: /* 2^23 */ + .tc FD_41600000_0[TC],0x4160000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 +.LC2: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* long long [r3] llroundf (float x [fp1]) + IEEE 1003.1 llroundf function. IEEE specifies "roundf to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "roundf to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^23 and 2^24-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^23 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llroundf) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^23 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + lfd fp11,.LC2@toc(2) /* Load 2^52 into fpr11. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^23 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^23 > x < -2^23 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp11 + fsub fp3,fp3,fp11 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llroundf) + +strong_alias (__llroundf, __lroundf) +weak_alias (__llroundf, llroundf) +weak_alias (__lroundf, lroundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S new file mode 100644 index 0000000000..d3c2fff581 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S @@ -0,0 +1 @@ +/* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S new file mode 100644 index 0000000000..4306c405c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S @@ -0,0 +1 @@ +/* __lround is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S new file mode 100644 index 0000000000..6b2a4e37a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S @@ -0,0 +1 @@ +/* __lroundf is in s_llroundf.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S new file mode 100644 index 0000000000..3dcd04b1f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S @@ -0,0 +1,75 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__nearbyint, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfd fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x+= TWO52 */ + fsub fp1,fp1,fp13 /* x-= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyint) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..11be35f94e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S @@ -0,0 +1,68 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1]) */ + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__nearbyintf, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfs fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S new file mode 100644 index 0000000000..7ba0adff84 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -0,0 +1,65 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__rint, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__rint) + +weak_alias (__rint, rint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__rint, rintl) +strong_alias (__rint, __rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S new file mode 100644 index 0000000000..b1d1e158c0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -0,0 +1,56 @@ +/* Round float to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__rintf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__rintf) + +weak_alias (__rintf, rintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S new file mode 100644 index 0000000000..fe315af51d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S @@ -0,0 +1,87 @@ +/* round function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* double [fp1] round (double x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfd fp10,.LC1@toc(2) + ble- cr6,.L4 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsub fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S new file mode 100644 index 0000000000..d213f43566 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S @@ -0,0 +1,81 @@ +/* roundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 +.LC1: /* 0.5 */ + .long 0x3f000000 + + .section ".text" + +/* float [fp1] roundf (float x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfs fp10,.LC1@toc(2) + ble- cr6,.L4 + fadds fp1,fp1,fp10 /* x+= 0.5; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsubs fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp9,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S new file mode 100644 index 0000000000..890eb21c54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S @@ -0,0 +1,79 @@ +/* trunc function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* double [fp1] trunc (double x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**52 then + subtracting +-2**52. */ + +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S new file mode 100644 index 0000000000..cfcff80bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S @@ -0,0 +1,73 @@ +/* truncf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +/* float [fp1] truncf (float x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**23 then + subtracting +-2**23. */ + +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h new file mode 100644 index 0000000000..33064c6781 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h @@ -0,0 +1,46 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruction. But we are not interested + in accurate clock cycles here so we don't do this. */ +#ifdef _ARCH_PWR4 +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var)) +#else +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mftb %0" : "=r" (Var)) +#endif + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S new file mode 100644 index 0000000000..480e38688b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S @@ -0,0 +1,177 @@ +/* PowerPC64 mpn_lshift -- rp[] = up[] << cnt + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define CNT r6 + +#define TNC r0 +#define U0 r30 +#define U1 r31 +#define RETVAL r5 + +EALIGN(__mpn_lshift, 5, 0) + std U1, -8(r1) + std U0, -16(r1) + subfic TNC, CNT, 64 + sldi r7, N, RP + add UP, UP, r7 + add RP, RP, r7 + rldicl. U0, N, 0, 62 + cmpdi CNT, U0, 2 + addi U1, N, RP + ld r10, -8(UP) + srd RETVAL, r10, TNC + + srdi U1, U1, 2 + mtctr U1 + beq cr0, L(b00) + blt cr6, L(b01) + ld r11, -16(UP) + beq cr6, L(b10) + + .align 4 +L(b11): sld r8, r10, CNT + srd r9, r11, TNC + ld U1, -24(UP) + addi UP, UP, -24 + sld r12, r11, CNT + srd r7, U1, TNC + addi RP, RP, 16 + bdnz L(gt3) + + or r11, r8, r9 + sld r8, U1, CNT + b L(cj3) + + .align 4 +L(gt3): ld U0, -8(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + or r10, r12, r7 + b L(L11) + + .align 5 +L(b10): sld r12, r10, CNT + addi RP, RP, 24 + srd r7, r11, TNC + bdnz L(gt2) + + sld r8, r11, CNT + or r10, r12, r7 + b L(cj2) + +L(gt2): ld U0, -24(UP) + sld r8, r11, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, 0 + ld U0, -40(UP) + or r11, r8, r9 + addi UP, UP, -16 + b L(L10) + + .align 4 +L(b00): ld U1, -16(UP) + sld r12, r10, CNT + srd r7, U1, TNC + ld U0, -24(UP) + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, TNC + addi RP, RP, r8 + bdz L(cj4) + +L(gt4): addi UP, UP, -32 + ld U0, -8(UP) + or r11, r8, r9 + b L(L00) + + .align 4 +L(b01): bdnz L(gt1) + sld r8, r10, CNT + std r8, -8(RP) + b L(ret) + +L(gt1): ld U0, -16(UP) + sld r8, r10, CNT + srd r9, U0, TNC + ld U1, -24(UP) + sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -32(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -40(UP) + addi UP, UP, -40 + or r10, r12, r7 + bdz L(end) + + .align 5 +L(top): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -8(UP) + std r11, -8(RP) + or r11, r8, r9 +L(L00): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + std r10, -16(RP) + or r10, r12, r7 +L(L11): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -24(UP) + std r11, -24(RP) + or r11, r8, r9 +L(L10): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + addi UP, UP, -32 + std r10, -32(RP) + addi RP, RP, -32 + or r10, r12, r7 + bdnz L(top) + + .align 5 +L(end): sld r12, U0, CNT + srd r7, U1, TNC + std r11, -8(RP) +L(cj4): or r11, r8, r9 + sld r8, U1, CNT + std r10, -16(RP) +L(cj3): or r10, r12, r7 + std r11, -24(RP) +L(cj2): std r10, -32(RP) + std r8, -40(RP) + +L(ret): ld U1, -8(r1) + ld U0, -16(r1) + mr RP, RETVAL + blr +END(__mpn_lshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S new file mode 100644 index 0000000000..a4c82c31ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S @@ -0,0 +1,397 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + subf 5,10,12 + andi. 0,9,1 + cmpldi cr6,11,0 + sldi 10,10,3 + mr 11,9 + mr 4,3 + ld 6,0(5) + ld 7,8(5) + subfic 9,10,64 + beq 2f +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 +#else + sld 0,6,10 +#endif + cmpldi 11,1 + mr 6,7 + addi 4,4,-8 + addi 11,11,-1 + b 1f +2: addi 5,5,8 + .align 4 +#ifdef __LITTLE_ENDIAN__ +0: srd 0,6,10 + sld 8,7,9 +#else +0: sld 0,6,10 + srd 8,7,9 +#endif + cmpldi 11,2 + ld 6,8(5) + or 0,0,8 + addi 11,11,-2 + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 +1: sld 8,6,9 +#else + sld 0,7,10 +1: srd 8,6,9 +#endif + or 0,0,8 + beq 8f + ld 7,16(5) + std 0,8(4) + addi 5,5,16 + addi 4,4,16 + b 0b + .align 4 +8: + std 0,8(4) + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr6,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S new file mode 100644 index 0000000000..f6581b50f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S @@ -0,0 +1,265 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + ld rCLS,.LC0@toc(r2) + lwz rCLS,0(rCLS) +/* If the cache line size was not set just goto to L(nondcbz) which is + safe for any cache line size. */ + cmpldi cr1,rCLS,0 + beq cr1,L(nondcbz) + + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmpldi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +#ifndef NO_BZERO_IMPL +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero,TB_TOCLESS) + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S new file mode 100644 index 0000000000..68e85cbdc8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S @@ -0,0 +1,135 @@ +/* PowerPC64 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(__mpn_mul_1, 5, 0) + std r27, -40(r1) + std r26, -48(r1) + li r12, 0 + ld r26, 0(UP) + + rldicl. r0, N, 0, 62 + cmpdi VL, r0, 2 + addic N, N, RP + srdi N, N, 2 + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): mr cr7, r12 + mulld cr0, r26, VL + mulhdu r12, r26, VL + addi UP, UP, 8 + addc r0, r0, r7 + std r0, 0(RP) + addi RP, RP, 8 + b L(fic) + +L(b00): ld r27, r8(UP) + addi UP, UP, 16 + mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r0, r0, r12 + adde r7, r7, N + addze r12, r8 + std r0, 0(RP) + std r7, 8(RP) + addi RP, RP, 16 + b L(fic) + + nop +L(b01): bdnz L(gt1) + mulld r0, r26, VL + mulhdu r8, r26, VL + addc r0, r0, r12 + std r0, 0(RP) + b L(ret) +L(gt1): ld r27, 8(UP) + nop + mulld r0, r26, VL + mulhdu N, r26, VL + ld r26, 16(UP) + mulld r7, r27, VL + mulhdu r8, r27, VL + mulld r9, r26, VL + mulhdu r10, r26, VL + addc r0, r0, r12 + adde r7, r7, N + adde r9, r9, r8 + addze r12, r10 + std r0, 0(RP) + std r7, 8(RP) + std r9, 16(RP) + addi UP, UP, 24 + addi RP, RP, 24 + b L(fic) + + nop +L(fic): ld r26, 0(UP) +L(b10): ld r27, 8(UP) + addi UP, UP, 16 + bdz L(end) + +L(top): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r26, 0(UP) + ld r27, 8(UP) + adde r0, r0, r12 + adde r7, r7, N + mulld r9, r26, VL + mulhdu r10, r26, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r26, 16(UP) + ld r27, 24(UP) + std r0, 0(RP) + adde r9, r9, r8 + std r7, 8(RP) + adde r11, r11, r10 + std r9, 16(RP) + addi UP, UP, 32 + std r11, 24(RP) + + addi RP, RP, 32 + bdnz L(top) + +L(end): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + adde r0, r0, r12 + adde r7, r7, N + std r0, 0(RP) + std r7, 8(RP) +L(ret): addze RP, r8 + ld r27, -40(r1) + ld r26, -48(r1) + blr +END(__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile new file mode 100644 index 0000000000..5da9052993 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -0,0 +1,47 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-power4 memcpy-ppc64 \ + memcmp-power8 memcmp-power7 memcmp-power4 memcmp-ppc64 \ + memset-power7 memset-power6 memset-power4 \ + memset-ppc64 memset-power8 \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 \ + strnlen-power8 strnlen-power7 strnlen-ppc64 \ + strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ + strncmp-power9 strncmp-power8 strncmp-power7 \ + strncmp-power4 strncmp-ppc64 \ + strchr-power8 strchr-power7 strchr-ppc64 \ + strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ + stpcpy-power7 stpcpy-ppc64 \ + strrchr-power8 strrchr-power7 strrchr-ppc64 \ + strncat-power8 strncat-power7 strncat-ppc64 \ + strncpy-power7 strncpy-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ + strcmp-power9 strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \ + strncpy-power8 strstr-power7 strstr-ppc64 \ + strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \ + strlen-power8 strcasestr-power8 strcasestr-ppc64 \ + strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \ + strncase-power8 + +CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops +CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-power7 wcschr-power6 wcschr-ppc64 \ + wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc64 \ + wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \ + +CFLAGS-wcschr-power7.c += -mcpu=power7 +CFLAGS-wcschr-power6.c += -mcpu=power6 +CFLAGS-wcsrchr-power7.c += -mcpu=power7 +CFLAGS-wcsrchr-power6.c += -mcpu=power6 +CFLAGS-wcscpy-power7.c += -mcpu=power7 +CFLAGS-wcscpy-power6.c += -mcpu=power6 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c new file mode 100644 index 0000000000..a8a097a614 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c @@ -0,0 +1,27 @@ +/* PowerPC64 default bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +extern __typeof (memmove) __memmove_ppc attribute_hidden; + +void __bcopy_ppc (const void *src, void *dest, size_t n) +{ + __memmove_ppc (dest, src, n); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c new file mode 100644 index 0000000000..05d46e2b48 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c @@ -0,0 +1,29 @@ +/* PowerPC64 multiarch bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "init-arch.h" + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +/* __bcopy_power7 symbol is implemented at memmove-power7.S */ +extern __typeof (bcopy) __bcopy_power7 attribute_hidden; + +libc_ifunc (bcopy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bcopy_power7 + : __bcopy_ppc); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c new file mode 100644 index 0000000000..83b224b8d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c @@ -0,0 +1,43 @@ +/* Multiple versions of bzero. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# include <string.h> +# include <strings.h> +# include "init-arch.h" + +extern __typeof (bzero) __bzero_ppc attribute_hidden; +extern __typeof (bzero) __bzero_power4 attribute_hidden; +extern __typeof (bzero) __bzero_power6 attribute_hidden; +extern __typeof (bzero) __bzero_power7 attribute_hidden; +extern __typeof (bzero) __bzero_power8 attribute_hidden; + +libc_ifunc (__bzero, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __bzero_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __bzero_power4 + : __bzero_ppc); + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..eb173f8b05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -0,0 +1,389 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 6 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_POWER5_PLUS | + PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS | PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + +#ifdef SHARED + /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_POWER4, + __memcpy_power4) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memset_power8) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, + __memset_power6) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_POWER4, + __memset_power4) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcpy.c. */ + IFUNC_IMPL (i, name, strcpy, + IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcpy_power8) + IFUNC_IMPL_ADD (array, i, strcpy, hwcap & PPC_FEATURE_HAS_VSX, + __strcpy_power7) + IFUNC_IMPL_ADD (array, i, strcpy, 1, + __strcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpcpy.c. */ + IFUNC_IMPL (i, name, stpcpy, + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpcpy_power8) + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap & PPC_FEATURE_HAS_VSX, + __stpcpy_power7) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, + __stpcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strlen_power8) + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, + __strlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strncmp_power9) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncmp_power8) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, + __strncmp_power7) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4, + __strncmp_power4) + IFUNC_IMPL_ADD (array, i, strncmp, 1, + __strncmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchr_power8) + IFUNC_IMPL_ADD (array, i, strchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strchr_power7) + IFUNC_IMPL_ADD (array, i, strchr, 1, + __strchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchrnul_power8) + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap & PPC_FEATURE_HAS_VSX, + __strchrnul_power7) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, + __strchrnul_ppc)) +#endif + + /* Support sysdeps/powerpc/powerpc64/multiarch/memcmp.c. */ + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memcmp_power8) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX, + __memcmp_power7) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_POWER4, + __memcmp_power4) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __bzero_power8) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, + __bzero_power6) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_POWER4, + __bzero_power4) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bcopy.c. */ + IFUNC_IMPL (i, name, bcopy, + IFUNC_IMPL_ADD (array, i, bcopy, hwcap & PPC_FEATURE_HAS_VSX, + __bcopy_power7) + IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/mempcpy.c. */ + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + hwcap & PPC_FEATURE_HAS_VSX, + __mempcpy_power7) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, + __mempcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memchr_power7) + IFUNC_IMPL_ADD (array, i, memchr, 1, + __memchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memrchr_power7) + IFUNC_IMPL_ADD (array, i, memrchr, 1, + __memrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, + __rawmemchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strnlen_power8) + IFUNC_IMPL_ADD (array, i, strnlen, hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasecmp_power8) + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, + __strcasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncasecmp_power8) + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, + __strncasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcschr_power7) + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcschr_power6) + IFUNC_IMPL_ADD (array, i, wcschr, 1, + __wcschr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcsrchr_power7) + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcsrchr_power6) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, + __wcsrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcscpy.c. */ + IFUNC_IMPL (i, name, wcscpy, + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_HAS_VSX, + __wcscpy_power7) + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcscpy_power6) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, + __wcscpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strrchr_power8) + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strrchr_power7) + IFUNC_IMPL_ADD (array, i, strrchr, 1, + __strrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncat.c. */ + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncat_power8) + IFUNC_IMPL_ADD (array, i, strncat, + hwcap & PPC_FEATURE_HAS_VSX, + __strncat_power7) + IFUNC_IMPL_ADD (array, i, strncat, 1, + __strncat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c. */ + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncpy_power8) + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __strncpy_power7) + IFUNC_IMPL_ADD (array, i, strncpy, 1, + __strncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c. */ + IFUNC_IMPL (i, name, stpncpy, + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpncpy_power8) + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __stpncpy_power7) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strcmp_power9) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcmp_power8) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcmp_power7) + IFUNC_IMPL_ADD (array, i, strcmp, 1, + __strcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcat.c. */ + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcat_power8) + IFUNC_IMPL_ADD (array, i, strcat, + hwcap & PPC_FEATURE_HAS_VSX, + __strcat_power7) + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ + IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strspn_power8) + IFUNC_IMPL_ADD (array, i, strspn, 1, + __strspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ + IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcspn_power8) + IFUNC_IMPL_ADD (array, i, strcspn, 1, + __strcspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, + IFUNC_IMPL_ADD (array, i, strstr, + hwcap & PPC_FEATURE_HAS_VSX, + __strstr_power7) + IFUNC_IMPL_ADD (array, i, strstr, 1, + __strstr_ppc)) + + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasestr.c. */ + IFUNC_IMPL (i, name, strcasestr, + IFUNC_IMPL_ADD (array, i, strcasestr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasestr_power8) + IFUNC_IMPL_ADD (array, i, strcasestr, 1, + __strcasestr_ppc)) + + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h new file mode 100644 index 0000000000..dbbe83c67c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h @@ -0,0 +1,18 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S new file mode 100644 index 0000000000..fedca9c997 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCHR __memchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c new file mode 100644 index 0000000000..b67631f017 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c @@ -0,0 +1,31 @@ +/* PowerPC64 default implementation of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCHR __memchr_ppc + +#undef weak_alias +#define weak_alias(a, b) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +extern __typeof (memchr) __memchr_ppc attribute_hidden; + +#include <string/memchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c new file mode 100644 index 0000000000..f6f4babc09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -0,0 +1,38 @@ +/* Multiple versions of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memchr) __memchr_ppc attribute_hidden; +extern __typeof (__memchr) __memchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memchr_power7 + : __memchr_ppc); + +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) +#else +#include <string/memchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S new file mode 100644 index 0000000000..e38b2a9c44 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S new file mode 100644 index 0000000000..a9cc979b92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S new file mode 100644 index 0000000000..b7837035b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power8/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c new file mode 100644 index 0000000000..3bd035dc49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCMP __memcmp_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__memcmp_ppc) aliasname \ + __attribute__ ((weak, alias ("__memcmp_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memcmp_ppc, __GI_memcmp, __memcmp_ppc); +#endif + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; + +#include <string/memcmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c new file mode 100644 index 0000000000..0d315d5e70 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c @@ -0,0 +1,44 @@ +/* Multiple versions of memcmp. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define memcmp __redirect_memcmp +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; +extern __typeof (memcmp) __memcmp_power4 attribute_hidden; +extern __typeof (memcmp) __memcmp_power7 attribute_hidden; +extern __typeof (memcmp) __memcmp_power8 attribute_hidden; +# undef memcmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcmp, memcmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memcmp_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __memcmp_power4 + : __memcmp_ppc); +#else +#include <string/memcmp.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S new file mode 100644 index 0000000000..a942287900 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_a2 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/a2/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S new file mode 100644 index 0000000000..39aa30c729 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/CELL. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_cell + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/cell/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S new file mode 100644 index 0000000000..6e7fea382b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S new file mode 100644 index 0000000000..40bcdb1161 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power6/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S new file mode 100644 index 0000000000..222936af63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S new file mode 100644 index 0000000000..2dc644c809 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S @@ -0,0 +1,28 @@ +/* Default memcpy implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define MEMCPY __memcpy_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c new file mode 100644 index 0000000000..9f4286c4fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -0,0 +1,55 @@ +/* Multiple versions of memcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memcpy so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memcpy) __libc_memcpy; + +extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; + +libc_ifunc (__libc_memcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 : + (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell : + (hwcap & PPC_FEATURE_POWER4) + ? __memcpy_power4 + : __memcpy_ppc); + +#undef memcpy +strong_alias (__libc_memcpy, memcpy); +libc_hidden_ver (__libc_memcpy, memcpy); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S new file mode 100644 index 0000000000..a9435fa654 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S @@ -0,0 +1,29 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef bcopy +#define bcopy __bcopy_power7 + +#include <sysdeps/powerpc/powerpc64/power7/memmove.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c new file mode 100644 index 0000000000..80353c5332 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c @@ -0,0 +1,44 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_ppc; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_ppc; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_ppc; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_ppc; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_ppc +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_ppc +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_ppc +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_ppc + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +#define MEMMOVE __memmove_ppc + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +#ifdef SHARED +# define memcpy __memcpy_ppc +#endif + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c new file mode 100644 index 0000000000..db2bbc7837 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c @@ -0,0 +1,45 @@ +/* Multiple versions of memmove. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memmove before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +extern __typeof (__redirect_memmove) __memmove_ppc attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_power7 attribute_hidden; + +libc_ifunc (__libc_memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); + +#undef memmove +strong_alias (__libc_memmove, memmove); +libc_hidden_ver (__libc_memmove, memmove); +#else +# include <string/memmove.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S new file mode 100644 index 0000000000..08f133644a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized mempcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMPCPY __mempcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/mempcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c new file mode 100644 index 0000000000..d0741fe318 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c new file mode 100644 index 0000000000..430557ee0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c @@ -0,0 +1,44 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__mempcpy) __mempcpy_ppc attribute_hidden; +extern __typeof (__mempcpy) __mempcpy_power7 attribute_hidden; +# undef mempcpy +# undef __mempcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___mempcpy, __mempcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __mempcpy_power7 + : __mempcpy_ppc); + +weak_alias (__mempcpy, mempcpy) +#else +# include <string/mempcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S new file mode 100644 index 0000000000..052aa732ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMRCHR __memrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c new file mode 100644 index 0000000000..2fc706db71 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c new file mode 100644 index 0000000000..fb09fdf89c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; +extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memrchr_power7 + : __memrchr_ppc); + +weak_alias (__memrchr, memrchr) +#else +#include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S new file mode 100644 index 0000000000..3908e8e412 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power4 + +#include <sysdeps/powerpc/powerpc64/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S new file mode 100644 index 0000000000..4ddbd2e274 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power6 + +#include <sysdeps/powerpc/powerpc64/power6/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S new file mode 100644 index 0000000000..97f686b35d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S @@ -0,0 +1,28 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power7 +#include <sysdeps/powerpc/powerpc64/power7/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..ea303533f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include <sysdeps/powerpc/powerpc64/power8/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S new file mode 100644 index 0000000000..0f16e21c61 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S @@ -0,0 +1,42 @@ +/* Default memset/bzero implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. NOTE: this code should be positioned + before ENTRY/END_GEN_TB redefinition. */ +ENTRY (__bzero_ppc) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero_ppc,TB_TOCLESS) + + +#if defined SHARED && IS_IN (libc) +# define MEMSET __memset_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +/* Do not implement __bzero at powerpc64/memset.S. */ +#define NO_BZERO_IMPL + +#include <sysdeps/powerpc/powerpc64/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c new file mode 100644 index 0000000000..a5d9b3e60e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c @@ -0,0 +1,53 @@ +/* Multiple versions of memset. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memset so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memset +# define memset __redirect_memset +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_memset) __libc_memset; + +extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; +extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__libc_memset, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memset_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __memset_power4 + : __memset_ppc); + +#undef memset +strong_alias (__libc_memset, memset); +libc_hidden_ver (__libc_memset, memset); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S new file mode 100644 index 0000000000..d79d72820c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S @@ -0,0 +1,23 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RAWMEMCHR __rawmemchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/rawmemchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c new file mode 100644 index 0000000000..cb55dbcc50 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c new file mode 100644 index 0000000000..8bfd58dd47 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c @@ -0,0 +1,39 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; +extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; +# undef __rawmemchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __rawmemchr_power7 + : __rawmemchr_ppc); + +weak_alias (__rawmemchr, rawmemchr) +#else +#include <string/rawmemchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c new file mode 100644 index 0000000000..7fb4b733e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/rtld-memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S new file mode 100644 index 0000000000..16ba7264c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c new file mode 100644 index 0000000000..e4b9ce9b6f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c @@ -0,0 +1,36 @@ +/* Multiarch stpcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (stpcpy) __stpcpy_power7 attribute_hidden; + +#define STPCPY __stpcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S new file mode 100644 index 0000000000..935347115a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized stpcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STPCPY __stpcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c new file mode 100644 index 0000000000..b5a3b12c05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c @@ -0,0 +1,37 @@ +/* Multiarch stpcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (stpcpy) __stpcpy_ppc attribute_hidden; + +#define STPCPY __stpcpy_ppc +#define memcpy __memcpy_ppc +#define strlen __strlen_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c new file mode 100644 index 0000000000..3e34e3cafe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c @@ -0,0 +1,41 @@ +/* Multiple versions of stpcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define NO_MEMPCPY_STPCPY_REDIRECT +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpcpy) __stpcpy_ppc attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power7 attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power8 attribute_hidden; + +libc_ifunc_hidden (__stpcpy, __stpcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpcpy_power7 + : __stpcpy_ppc); + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_def (stpcpy) +#else +# include <string/stpcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S new file mode 100644 index 0000000000..6636b01d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S @@ -0,0 +1,30 @@ +/* Optimized stpncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S new file mode 100644 index 0000000000..6ce706a879 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S @@ -0,0 +1,28 @@ +/* Optimized stpncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c new file mode 100644 index 0000000000..22186166a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c @@ -0,0 +1,26 @@ +/* Default stpncpy implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STPNCPY __stpncpy_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_ppc, __GI___stpncpy, __stpncpy_ppc); +#endif + +#include <string/stpncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c new file mode 100644 index 0000000000..e9b37dcc9a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c @@ -0,0 +1,39 @@ +/* Multiple versions of stpncpy. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define stpncpy __redirect_stpncpy +# define __stpncpy __redirect___stpncpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power8 attribute_hidden; +# undef stpncpy +# undef __stpncpy + +libc_ifunc_redirected (__redirect___stpncpy, __stpncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpncpy_power7 + : __stpncpy_ppc); +weak_alias (__stpncpy, stpncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S new file mode 100644 index 0000000000..025c5a9f13 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power7 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S new file mode 100644 index 0000000000..9b62476e09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c new file mode 100644 index 0000000000..cbf91755da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strcasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strcasecmp __strcasecmp_ppc + +#include <string/strcasecmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c new file mode 100644 index 0000000000..dcb4ef4125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strcasecmp) __libc_strcasecmp; + +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strcasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); + +weak_alias (__libc_strcasecmp, strcasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S new file mode 100644 index 0000000000..da4c4054c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S @@ -0,0 +1,31 @@ +/* Optimized strcasecmp_l implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_l_power7 + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define USE_IN_EXTENDED_LOCALE_MODEL + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c new file mode 100644 index 0000000000..10b8f2e84d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c @@ -0,0 +1,40 @@ +/* Multiple versions of strcasecmp_l. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp_l __strcasecmp_l_ppc +extern __typeof (__strcasecmp_l) __strcasecmp_l_ppc attribute_hidden; +extern __typeof (__strcasecmp_l) __strcasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strcasecmp_l.c> +#undef strcasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp_l) __libc_strcasecmp_l; +libc_ifunc (__libc_strcasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_l_power7 + : __strcasecmp_l_ppc); + +weak_alias (__libc_strcasecmp_l, strcasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S new file mode 100644 index 0000000000..2cfb5ae77a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S @@ -0,0 +1,35 @@ +/* Optimized strcasestr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCASESTR __strcasestr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* The following definitions are used in strcasestr optimization. */ + +/* strlen is used to calculate len of r4. */ +#define STRLEN __strlen_power8 +/* strnlen is used to check if len of r3 is more than r4. */ +#define STRNLEN __strnlen_power7 +/* strchr is used to check if first char of r4 is present in r3. */ +#define STRCHR __strchr_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strcasestr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c new file mode 100644 index 0000000000..61f278f697 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c @@ -0,0 +1,34 @@ +/* PowerPC64 default implementation of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strcasestr_ppc, __GI_strcasestr, __strcasestr_ppc); +#endif + + +#undef weak_alias +#define weak_alias(a,b) + +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c new file mode 100644 index 0000000000..9e6a16d6a9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c @@ -0,0 +1,37 @@ +/* Multiple versions of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasestr) __strcasestr_ppc attribute_hidden; +extern __typeof (__strcasestr) __strcasestr_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strcasestr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasestr_power8 + : __strcasestr_ppc); + +weak_alias (__strcasestr, strcasestr) +#else +#include <string/strcasestr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c new file mode 100644 index 0000000000..22d2caaec3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power7; +extern typeof (strlen) __strlen_power7; + +#define strcpy __strcpy_power7 +#define strlen __strlen_power7 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c new file mode 100644 index 0000000000..f138beec67 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power8 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power8; +extern typeof (strlen) __strlen_power8; + +#define strcpy __strcpy_power8 +#define strlen __strlen_power8 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c new file mode 100644 index 0000000000..5049fc03f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_ppc, __GI_strcat, __strcat_ppc); +#endif + +extern __typeof (strcat) __strcat_ppc attribute_hidden; + +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c new file mode 100644 index 0000000000..3336aedcec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strcat __redirect_strcat +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcat) __strcat_ppc attribute_hidden; +extern __typeof (strcat) __strcat_power7 attribute_hidden; +extern __typeof (strcat) __strcat_power8 attribute_hidden; +# undef strcat + +libc_ifunc_redirected (__redirect_strcat, strcat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcat_power7 + : __strcat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S new file mode 100644 index 0000000000..e64c0b7c82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S new file mode 100644 index 0000000000..bbda7b0505 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S new file mode 100644 index 0000000000..769f9f07d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef SHARED +# define STRCHR __strchr_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c new file mode 100644 index 0000000000..573105818f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c @@ -0,0 +1,42 @@ +/* Multiple versions of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strchr) __strchr_ppc attribute_hidden; +extern __typeof (strchr) __strchr_power7 attribute_hidden; +extern __typeof (strchr) __strchr_power8 attribute_hidden; +# undef strchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strchr, strchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchr_power7 + : __strchr_ppc); +weak_alias (strchr, index) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S new file mode 100644 index 0000000000..c8e28721fd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S new file mode 100644 index 0000000000..1cd39fc1b3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c new file mode 100644 index 0000000000..8d313c3e1d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c new file mode 100644 index 0000000000..1e9018f88a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c @@ -0,0 +1,40 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strchrnul, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchrnul_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchrnul_power7 + : __strchrnul_ppc); + +weak_alias (__strchrnul, strchrnul) +#else +#include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S new file mode 100644 index 0000000000..82d1b63af9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S new file mode 100644 index 0000000000..b2464a8018 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S new file mode 100644 index 0000000000..48ea05d2c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER9/PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S new file mode 100644 index 0000000000..085e74758f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S @@ -0,0 +1,29 @@ +/* Default strcmp implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRCMP __strcmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strcmp; __GI_strcmp = __strcmp_ppc +#endif /* SHARED && IS_IN */ + +#include <sysdeps/powerpc/powerpc64/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c new file mode 100644 index 0000000000..fc10205b00 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -0,0 +1,42 @@ +/* Multiple versions of strcmp. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcmp __redirect_strcmp +/* Omit the strcmp inline definitions because it would redefine strcmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcmp) __strcmp_ppc attribute_hidden; +extern __typeof (strcmp) __strcmp_power7 attribute_hidden; +extern __typeof (strcmp) __strcmp_power8 attribute_hidden; +extern __typeof (strcmp) __strcmp_power9 attribute_hidden; + +# undef strcmp + +libc_ifunc_redirected (__redirect_strcmp, strcmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strcmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcmp_power7 + : __strcmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c new file mode 100644 index 0000000000..892a551183 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c @@ -0,0 +1,32 @@ +/* Multiarch strcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; + +#define STRCPY __strcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S new file mode 100644 index 0000000000..6c753b5d1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCPY __strcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c new file mode 100644 index 0000000000..cd6dd09541 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c @@ -0,0 +1,35 @@ +/* Multiarch strcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#if defined SHARED && IS_IN (libc) +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; + +# define STRCPY __strcpy_ppc +# define memcpy __memcpy_ppc +# define strlen __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcpy_ppc, __GI_strcpy, __strcpy_ppc); +#endif + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c new file mode 100644 index 0000000000..0da53e30b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcpy __redirect_strcpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power8 attribute_hidden; +#undef strcpy + +libc_ifunc_redirected (__redirect_strcpy, strcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcpy_power7 + : __strcpy_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S new file mode 100644 index 0000000000..39b4cd8239 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strcspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strcspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c new file mode 100644 index 0000000000..96396af125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c @@ -0,0 +1,26 @@ +/* Default strcspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRCSPN __strcspn_ppc + +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) +#endif + +#include <string/strcspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c new file mode 100644 index 0000000000..a6df885181 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strcspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +#undef strcspn +extern __typeof (strcspn) __libc_strcspn; + +extern __typeof (strcspn) __strcspn_ppc attribute_hidden; +extern __typeof (strcspn) __strcspn_power8 attribute_hidden; + +libc_ifunc (__libc_strcspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcspn_power8 + : __strcspn_ppc); + +weak_alias (__libc_strcspn, strcspn) +libc_hidden_builtin_def (strcspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S new file mode 100644 index 0000000000..333496efa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S new file mode 100644 index 0000000000..b4deea5f93 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S new file mode 100644 index 0000000000..13231b8c64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S @@ -0,0 +1,28 @@ +/* Default strlen implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRLEN __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c new file mode 100644 index 0000000000..a5a7b59558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c @@ -0,0 +1,44 @@ +/* Multiple versions of strlen. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +/* Redefine strlen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strlen +# define strlen __redirect_strlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_strlen) __libc_strlen; + +extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power8 attribute_hidden; + +libc_ifunc (__libc_strlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strlen_power7 + : __strlen_ppc); + +#undef strlen +strong_alias (__libc_strlen, strlen) +libc_hidden_ver (__libc_strlen, strlen) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c new file mode 100644 index 0000000000..177da4a2f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp __strncasecmp_power7 + +extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S new file mode 100644 index 0000000000..8a24c34719 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S @@ -0,0 +1,28 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strncasecmp __strncasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncase.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c new file mode 100644 index 0000000000..0a75f75745 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strncasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strncasecmp __strncasecmp_ppc + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c new file mode 100644 index 0000000000..197f7133e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c @@ -0,0 +1,36 @@ +/* Multiple versions of strncasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strncasecmp) __libc_strncasecmp; + +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strncasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); + +weak_alias (__libc_strncasecmp, strncasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c new file mode 100644 index 0000000000..f87ff6c640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp_l __strncasecmp_l_power7 +#define USE_IN_EXTENDED_LOCALE_MODEL 1 + +extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c new file mode 100644 index 0000000000..6c2429c58a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncasecmp_l + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp_l __strncasecmp_l_ppc +extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; +extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strncase_l.c> +#undef strncasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; +libc_ifunc (__libc_strncasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_l_power7 + : __strncasecmp_l_ppc); + +weak_alias (__libc_strncasecmp_l, strncasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c new file mode 100644 index 0000000000..f695f834a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power7 + +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power7 +#define __strnlen __strnlen_power7 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c new file mode 100644 index 0000000000..1ec1259b95 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power8 + +extern __typeof (strncat) __strncat_power8 attribute_hidden; +extern __typeof (strlen) __strlen_power8 attribute_hidden; +extern __typeof (strnlen) __strnlen_power8 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power8 +#define __strnlen __strnlen_power8 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c new file mode 100644 index 0000000000..e4c8c01105 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncat_ppc, __GI_strncat, __strncat_ppc); +#endif + +extern __typeof (strncat) __strncat_ppc attribute_hidden; + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c new file mode 100644 index 0000000000..72f283354e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c @@ -0,0 +1,34 @@ +/* Multiple versions of strncat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncat) __strncat_ppc attribute_hidden; +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strncat) __strncat_power8 attribute_hidden; + +libc_ifunc (strncat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncat_power7 + : __strncat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S new file mode 100644 index 0000000000..01729a3bba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S new file mode 100644 index 0000000000..a069d4b21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S new file mode 100644 index 0000000000..3cbcaada62 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S new file mode 100644 index 0000000000..6d0deaa6e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S new file mode 100644 index 0000000000..e4b93ae8f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S @@ -0,0 +1,28 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRNCMP __strncmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c new file mode 100644 index 0000000000..14122c65a4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -0,0 +1,47 @@ +/* Multiple versions of strncmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncmp) __strncmp_ppc attribute_hidden; +extern __typeof (strncmp) __strncmp_power4 attribute_hidden; +extern __typeof (strncmp) __strncmp_power7 attribute_hidden; +extern __typeof (strncmp) __strncmp_power8 attribute_hidden; +extern __typeof (strncmp) __strncmp_power9 attribute_hidden; +# undef strncmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncmp, strncmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strncmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __strncmp_power4 + : __strncmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S new file mode 100644 index 0000000000..03f7f83448 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized strncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S new file mode 100644 index 0000000000..17117eb7ec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S @@ -0,0 +1,29 @@ +/* Optimized strncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* memset is used to pad the end of the string. */ +#define MEMSET __memset_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c new file mode 100644 index 0000000000..32412974aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRNCPY __strncpy_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strncpy_ppc) aliasname \ + __attribute__ ((weak, alias ("__strncpy_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strncpy_ppc, __GI_strncpy, __strncpy_ppc); +#endif + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; + +#include <string/strncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c new file mode 100644 index 0000000000..bb63c185e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncpy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strncpy __redirect_strncpy +/* Omit the strncpy inline definitions because it would redefine strncpy. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; +extern __typeof (strncpy) __strncpy_power7 attribute_hidden; +extern __typeof (strncpy) __strncpy_power8 attribute_hidden; +# undef strncpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncpy, strncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncpy_power7 + : __strncpy_ppc); + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..2f0a183e31 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNLEN __strnlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S new file mode 100644 index 0000000000..ccea15df10 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strnlen __strnlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power8/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c new file mode 100644 index 0000000000..708455a156 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c new file mode 100644 index 0000000000..7f89132aa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c @@ -0,0 +1,41 @@ +/* Multiple versions of strnlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strnlen) __strnlen_ppc attribute_hidden; +extern __typeof (__strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (__strnlen) __strnlen_power8 attribute_hidden; +# undef strnlen +# undef __strnlen +libc_ifunc_redirected (__redirect___strnlen, __strnlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strnlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strnlen_power7 + : __strnlen_ppc); +weak_alias (__strnlen, strnlen) + +#else +#include <string/strnlen.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S new file mode 100644 index 0000000000..10bab2ec54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strrchr implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRRCHR __strrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S new file mode 100644 index 0000000000..23365a1446 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S @@ -0,0 +1,39 @@ +/* Optimized strrchr implementation for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(__strrchr_power8) \ + .align ALIGNARG(2); \ + BODY_LABEL(__strrchr_power8): \ + cfi_startproc; \ + LOCALENTRY(__strrchr_power8) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strrchr_power8) \ + END_2(__strrchr_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c new file mode 100644 index 0000000000..62b77a0bbe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRRCHR __strrchr_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strrchr_ppc, __GI_strrchr, __strrchr_ppc); +#endif + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; + +#include <string/strrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c new file mode 100644 index 0000000000..0f94c9d6a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c @@ -0,0 +1,40 @@ +/* Multiple versions of strrchr. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strrchr __redirect_strrchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; +extern __typeof (strrchr) __strrchr_power7 attribute_hidden; +extern __typeof (strrchr) __strrchr_power8 attribute_hidden; +#undef strrchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strrchr, strrchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strrchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strrchr_power7 + : __strrchr_ppc); +weak_alias (strrchr, rindex) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S new file mode 100644 index 0000000000..f8487f1cbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c new file mode 100644 index 0000000000..53d3d61651 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c @@ -0,0 +1,25 @@ +/* Default strspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRSPN __strspn_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) +#endif + +#include <string/strspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c new file mode 100644 index 0000000000..0957482766 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +#undef strspn +extern __typeof (strspn) __libc_strspn; + +extern __typeof (strspn) __strspn_ppc attribute_hidden; +extern __typeof (strspn) __strspn_power8 attribute_hidden; + +libc_ifunc (__libc_strspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strspn_power8 + : __strspn_ppc); + +weak_alias (__libc_strspn, strspn) +libc_hidden_builtin_def (strspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S new file mode 100644 index 0000000000..3991df74a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S @@ -0,0 +1,30 @@ +/* Optimized strstr implementation for POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSTR __strstr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define STRLEN __strlen_power7 +#define STRNLEN __strnlen_power7 +#define STRCHR __strchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strstr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c new file mode 100644 index 0000000000..37add12c87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strstr_ppc, __GI_strstr, __strstr_ppc); +#endif + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c new file mode 100644 index 0000000000..d903b2702b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c @@ -0,0 +1,36 @@ +/* Multiple versions of strstr. PowerPC64 version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strstr __redirect_strstr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strstr) __strstr_ppc attribute_hidden; +extern __typeof (strstr) __strstr_power7 attribute_hidden; +# undef strstr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strstr, strstr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strstr_power7 + : __strstr_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c new file mode 100644 index 0000000000..080cb696a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c new file mode 100644 index 0000000000..8f4de0e857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c new file mode 100644 index 0000000000..e781e947fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c new file mode 100644 index 0000000000..ca373e096f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c @@ -0,0 +1,43 @@ +/* Multiple versions of wcschr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcschr __redirect_wcschr +# define __wcschr __redirect___wcschr +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcschr) __wcschr_ppc attribute_hidden; +extern __typeof (wcschr) __wcschr_power6 attribute_hidden; +extern __typeof (wcschr) __wcschr_power7 attribute_hidden; +# undef wcschr +# undef __wcschr + +libc_ifunc_redirected (__redirect___wcschr, __wcschr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcschr_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcschr_power6 + : __wcschr_ppc); +weak_alias (__wcschr, wcschr) +#else +#undef libc_hidden_def +#define libc_hidden_def(a) +#include <wcsmbs/wcschr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c new file mode 100644 index 0000000000..89d8a39640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c new file mode 100644 index 0000000000..47ba73b2cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c new file mode 100644 index 0000000000..1924b235ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c new file mode 100644 index 0000000000..13e44afb09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcscpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; + +libc_ifunc (wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +#else +#include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c new file mode 100644 index 0000000000..5dc448b339 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c new file mode 100644 index 0000000000..fa25aa0475 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c new file mode 100644 index 0000000000..8a913412a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c new file mode 100644 index 0000000000..07590f5a90 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcsrchr) __wcsrchr_ppc attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power6 attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power7 attribute_hidden; + +libc_ifunc (wcsrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcsrchr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcsrchr_power6 + : __wcsrchr_ppc); +#else +#include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c new file mode 100644 index 0000000000..078156f5d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies new file mode 100644 index 0000000000..a372141bb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile new file mode 100644 index 0000000000..ba06adb5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile @@ -0,0 +1,6 @@ +# Makefile fragment for POWER4/5/5+. + +ifeq ($(subdir),string) +CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies new file mode 100644 index 0000000000..c1f617b7da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies new file mode 100644 index 0000000000..8d6531a174 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S new file mode 100644 index 0000000000..6ca98e909c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -0,0 +1,1369 @@ +/* Optimized memcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + + .machine power4 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, -8(r1) + std rWORD7, -16(r1) + cfi_offset(rWORD8, -8) + cfi_offset(rWORD7, -16) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 0(rSTR1) + ld rWORD4, 0(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne- cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr7x): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr1x): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr6x): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr5x): + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ +#if 0 +/* Huh? We've already branched on cr6! */ + beq- cr6, L(zeroLength) +#endif + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne- cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz- L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne- cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne- cr6, L(bLcr6) + bne- cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne- cr7, L(bx12) + bne- cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne- cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +L(unaligned): + std rSHL, -24(r1) + cfi_offset(rSHL, -24) + clrldi rSHL, rSTR2, 61 + beq- cr6, L(duzeroLength) + std rSHR, -32(r1) + cfi_offset(rSHR, -32) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, -40(r1) + cfi_offset(rWORD8_SHIFT, -40) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, -56(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 8 +#endif + sld rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, -40(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, -56(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD8_SHIFT, -40) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + sldi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 8 + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD6, 0(rSTR2) + ldu rWORD8, 8(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD7, 0(rSTR1) +#endif + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD3, 0(rSTR1) +#endif + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD1, 0(rSTR1) +#endif + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 +#else + ld rWORD1, 8(rSTR1) +#endif + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, -16(r1) + ld rSHL, -24(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, -32(r1) + ld rWORD8_SHIFT, -40(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, -48(r1) + ld rWORD4_SHIFT, -56(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, -64(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dureturn29): + ld rSHL, -24(r1) + ld rSHR, -32(r1) +L(dureturn27): + ld rWORD8_SHIFT, -40(r1) +L(dureturn26): + ld rWORD2_SHIFT, -48(r1) +L(dureturn25): + ld rWORD4_SHIFT, -56(r1) +L(dureturn24): + ld rWORD6_SHIFT, -64(r1) + blr +L(duzeroLength): + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h new file mode 100644 index 0000000000..9a4ff79f4a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h @@ -0,0 +1 @@ +#include "../../powerpc32/power4/memcopy.h" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S new file mode 100644 index 0000000000..2e96376b9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -0,0 +1,477 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine power4 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + cmpldi cr1,11,0 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 + sldi 10,10,3 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) + subfic 9,10,64 + bf 30,1f + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,8f /* if total DWs = 3, then bypass loop */ + bf 31,4f + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,8f /* if total DWs = 4, then bypass loop */ + b 4f + .align 4 +1: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,4f + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +4: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ 4b + .align 4 +8: + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) +3: + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr1,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S new file mode 100644 index 0000000000..a57214e0b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S @@ -0,0 +1,251 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power4 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + li rCLS,128 /* cache line size is 128 */ + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP +L(getCacheAligned): + cmpldi cr1,rLEN,32 + andi. rTMP,rMEMP,127 + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies new file mode 100644 index 0000000000..30edcf7f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S new file mode 100644 index 0000000000..2b0c00dfb2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -0,0 +1,225 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + ble cr1, L(ux) +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + .align 4 +L(u1): + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + b L(u1) + +L(u3): sub rRTN, rWORD3, rWORD4 + blr +L(u4): sub rRTN, rWORD1, rWORD2 + blr +L(ux): + li rRTN, 0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies new file mode 100644 index 0000000000..565bc94471 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies @@ -0,0 +1,4 @@ +powerpc/power5+/fpu +powerpc/power5+ +powerpc/powerpc64/power5/fpu +powerpc/powerpc64/power5 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies new file mode 100644 index 0000000000..f00c50fb49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies new file mode 100644 index 0000000000..c0e67848e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S new file mode 100644 index 0000000000..39b7ee78e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S @@ -0,0 +1,37 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S new file mode 100644 index 0000000000..d1c6f26d6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S @@ -0,0 +1,30 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S new file mode 100644 index 0000000000..6411f15633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S @@ -0,0 +1,37 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S new file mode 100644 index 0000000000..26c3b2594b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S @@ -0,0 +1,30 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S new file mode 100644 index 0000000000..909714b449 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S @@ -0,0 +1,58 @@ +/* llround function. POWER5+, PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +EALIGN (__llround, 4, 0) + CALL_MCOUNT 0 + frin fp2, fp1 /* Round to nearest +-0.5. */ + fctidz fp3, fp2 /* Convert To Integer DW round toward 0. */ + stfd fp3, -16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3, -16(r1) + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S new file mode 100644 index 0000000000..dc46d20f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S @@ -0,0 +1,37 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S new file mode 100644 index 0000000000..0a587843ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S @@ -0,0 +1,30 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S new file mode 100644 index 0000000000..7f8290e408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S @@ -0,0 +1,37 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S new file mode 100644 index 0000000000..07f5d33127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S @@ -0,0 +1,30 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies new file mode 100644 index 0000000000..0851b19fa2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies new file mode 100644 index 0000000000..6b8c23efa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies new file mode 100644 index 0000000000..3740d050a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..d6a829ea37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S @@ -0,0 +1,60 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + lis r0,0x7ff0 + nop /* insure the following is in a different */ + nop /* dispatch group */ + ld r4,-8(r1) + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies new file mode 100644 index 0000000000..9a3cbb0938 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies new file mode 100644 index 0000000000..4c782d4122 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power5+/fpu +powerpc/powerpc64/power5+ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies new file mode 100644 index 0000000000..f09854edb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies new file mode 100644 index 0000000000..fca8a4ef0f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S new file mode 100644 index 0000000000..ec36d1be5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S @@ -0,0 +1,58 @@ +/* copysign(). PowerPC64/POWER6 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + + .section ".text" + .type __copysign, @function + .machine power6 +EALIGN (__copysign, 4, 0) + CALL_MCOUNT 0 + fcpsgn fp1,fp2,fp1 + blr +END (__copysign) + +hidden_def (__copysign) +weak_alias (__copysign, copysign) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__copysign, __copysignf) +hidden_def (__copysignf) +weak_alias (__copysignf, copysignf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, copysign, copysignl, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, copysign, copysignl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S new file mode 100644 index 0000000000..d4aa702d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_copysign.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..85187b45f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S @@ -0,0 +1,59 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + ori r1,r1,0 + ld r4,-8(r1) + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S new file mode 100644 index 0000000000..1f7294b8ed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -0,0 +1,1499 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. + + For POWER6 unaligned loads will take a 20+ cycle hiccup for any + L1 cache miss that crosses a 32- or 128-byte boundary. Store + is more forgiving and does not take a hiccup until page or + segment boundaries. So we require doubleword alignment for + the source but may take a risk and only require word alignment + for the destination. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine "power6" +EALIGN (MEMCPY, 7, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + mtcrf 0x01,0 + cmpld cr6,10,11 + srdi 9,5,3 /* Number of full double words remaining. */ + beq .L0 + + subf 5,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. + Duplicate some code to maximize fall-through and minimize agen delays. */ +1: bf 31,2f + lbz 6,0(4) + stb 6,0(3) + bf 30,5f + lhz 6,1(4) + sth 6,1(3) + bf 29,0f + lwz 6,3(4) + stw 6,3(3) + b 0f +5: + bf 29,0f + lwz 6,1(4) + stw 6,1(3) + b 0f + +2: bf 30,4f + lhz 6,0(4) + sth 6,0(3) + bf 29,0f + lwz 6,2(4) + stw 6,2(3) + b 0f + +4: bf 29,0f + lwz 6,0(4) + stw 6,0(3) +0: +/* Add the number of bytes until the 1st doubleword of dst to src and dst. */ + add 4,4,0 + add 3,3,0 + + clrldi 10,4,61 /* check alignment of src again. */ + srdi 9,5,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ + .align 4 +.L0: + clrldi 11,5,61 + andi. 0,5,0x78 + srdi 12,5,7 /* Number of 128-byte blocks to move. */ + cmpldi cr1,11,0 /* If the tail is 0 bytes */ + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 16 doublewords (128-bytes) per iteration. + If the copy is not an exact multiple of 128 bytes, 1-15 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte + are copied a word/halfword/byte at a time as needed to preserve + alignment. + + For POWER6 the L1 is store-through and the L2 is store-in. The + L2 is clocked at half CPU clock so we can store 16 bytes every + other cycle. POWER6 also has a load/store bypass so we can do + load, load, store, store every 2 cycles. + + The following code is sensitive to cache line alignment. Do not + make any change with out first making sure they don't result in + splitting ld/std pairs across a cache line. */ + + mtcrf 0x02,5 + mtcrf 0x01,5 + cmpldi cr5,12,1 + beq L(das_loop) + + bf 25,4f + .align 3 + ld 6,0(4) + ld 7,8(4) + mr 11,4 + mr 10,3 + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + std 6,16(3) + std 7,24(3) + ld 6,0+32(4) + ld 7,8+32(4) + addi 4,4,64 + addi 3,3,64 + std 6,0+32(10) + std 7,8+32(10) + ld 6,16+32(11) + ld 7,24+32(11) + std 6,16+32(10) + std 7,24+32(10) +4: + mr 10,3 + bf 26,2f + ld 6,0(4) + ld 7,8(4) + mr 11,4 + nop + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + addi 4,4,32 + std 6,16(3) + std 7,24(3) + addi 3,3,32 +6: + nop + bf 27,5f + ld 6,0+32(11) + ld 7,8+32(11) + addi 4,4,16 + addi 3,3,16 + std 6,0+32(10) + std 7,8+32(10) + bf 28,L(das_loop_s) + ld 0,16+32(11) + addi 4,4,8 + addi 3,3,8 + std 0,16+32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +5: + nop + bf 28,L(das_loop_s) + ld 6,32(11) + addi 4,4,8 + addi 3,3,8 + std 6,32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +2: + mr 11,4 + bf 27,1f + ld 6,0(4) + ld 7,8(4) + addi 4,4,16 + addi 3,3,16 + std 6,0(10) + std 7,8(10) + bf 28,L(das_loop_s) + ld 0,16(11) + addi 4,11,24 + addi 3,10,24 + std 0,16(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +1: + nop + bf 28,L(das_loop_s) + ld 6,0(4) + addi 4,4,8 + addi 3,3,8 + std 6,0(10) +L(das_loop_s): + nop + blt cr5,L(das_tail) + .align 4 +L(das_loop): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + addi 12,12,-1 + nop + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + ble cr5,L(das_loop_e) + + mtctr 12 + .align 4 +L(das_loop2): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + bdnz L(das_loop2) +L(das_loop_e): +/* Check of a 1-7 byte tail, return if none. */ + bne cr1,L(das_tail2) +/* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(das_tail): + beq cr1,0f + +L(das_tail2): +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 6,4(4) + sth 6,4(3) + bf 31,0f + lbz 6,6(4) + stb 6,6(3) + b 0f +5: bf 31,0f + lbz 6,4(4) + stb 6,4(3) + b 0f + +2: bf 30,1f + lhz 6,0(4) + sth 6,0(3) + bf 31,0f + lbz 6,2(4) + stb 6,2(3) + b 0f + +1: bf 31,0f + lbz 6,0(4) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 128-byte, + and 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 128-byte + boundaries. Since only loads are sensitive to the 32-/128-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small (~20 cycle) delay. */ + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq L(dus_tail) /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +L(dus_tail): +/* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ +/* We need to special case the fall-through because the biggest delays + are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) + blt cr1,L(dus_tail8) + cmpldi cr0,10,24 +L(dus_tail16): /* Move 16 bytes. */ + stw 6,0(3) + stw 7,4(3) + lwz 6,8(12) + lwz 7,12(12) + stw 6,8(3) + stw 7,12(3) +/* Move 8 bytes more. */ + bf 28,L(dus_tail16p8) + cmpldi cr1,10,28 + lwz 6,16(12) + lwz 7,20(12) + stw 6,16(3) + stw 7,20(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail16p4) + lwz 6,24(12) + stw 6,24(3) + addi 12,12,28 + addi 3,3,28 + bgt cr1,L(dus_tail2) + /* exactly 28 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(dus_tailX) /* exactly 16 bytes, early exit. */ + cmpldi cr1,10,20 + bf 29,L(dus_tail16p2) +/* Move 4 bytes more. */ + lwz 6,16(12) + stw 6,16(3) + addi 12,12,20 + addi 3,3,20 + bgt cr1,L(dus_tail2) + /* exactly 20 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 3,3,24 + bgt cr0,L(dus_tail2) + /* exactly 24 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 3,3,16 + b L(dus_tail2) + + .align 4 +L(dus_tail8): /* Move 8 bytes. */ +/* r6, r7 already loaded speculatively. */ + cmpldi cr1,10,8 + cmpldi cr0,10,12 + bf 28,L(dus_tail4) + .align 2 + stw 6,0(3) + stw 7,4(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail8p4) + lwz 6,8(12) + stw 6,8(3) + addi 12,12,12 + addi 3,3,12 + bgt cr0,L(dus_tail2) + /* exactly 12 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 3,3,8 + bgt cr1,L(dus_tail2) + /* exactly 8 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + + .align 4 +L(dus_tail4): /* Move 4 bytes. */ +/* r6 already loaded speculatively. If we are here we know there is + more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +L(dus_tail2): /* Move 2-3 bytes. */ + bf 30,L(dus_tail1) + lhz 6,0(12) + sth 6,0(3) + bf 31,L(dus_tailX) + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +L(dus_tail1): /* Move 1 byte. */ + bf 31,L(dus_tailX) + lbz 6,0(12) + stb 6,0(3) +L(dus_tailX): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,L(dus_4) +/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 + cycle delay. This case should be rare and any attempt to avoid this + would take most of 20 cycles any way. */ + ld 6,0(4) + std 6,0(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(dus_4): + bf 29,L(dus_tail2) + lwz 6,0(4) + stw 6,0(3) + bf 30,L(dus_5) + lhz 7,4(4) + sth 7,4(3) + bf 31,L(dus_0) + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +L(dus_5): + bf 31,L(dus_0) + lbz 6,4(4) + stb 6,4(3) +L(dus_0): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + cfi_offset(31,-8) + mr 12,4 + mr 31,5 + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 /* Move source addr to previous full double word. */ + cmpldi cr5, 10, 2 + cmpldi cr0, 10, 4 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) /* pre load 1st full doubleword. */ + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) /* pre load 2nd full doubleword. */ + bge cr0, L(du4_do) + blt cr5, L(du1_do) + beq cr5, L(du2_do) + b L(du3_do) + + .align 4 +L(du1_do): + bf 30,L(du1_1dw) + + /* there are at least two DWs to copy */ + /* FIXME: can combine last shift and "or" into "rldimi" */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du1_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du1_fini) /* if total DWs = 4, then bypass loop */ + b L(du1_loop) + .align 4 +L(du1_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du1_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du1_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du1_loop) + .align 4 +L(du1_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du2_do): + bf 30,L(du2_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du2_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du2_fini) /* if total DWs = 4, then bypass loop */ + b L(du2_loop) + .align 4 +L(du2_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du2_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du2_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du2_loop) + .align 4 +L(du2_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du3_do): + bf 30,L(du3_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du3_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du3_fini) /* if total DWs = 4, then bypass loop */ + b L(du3_loop) + .align 4 +L(du3_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du3_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du3_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du3_loop) + .align 4 +L(du3_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du4_do): + cmpldi cr5, 10, 6 + beq cr0, L(du4_dox) + blt cr5, L(du5_do) + beq cr5, L(du6_do) + b L(du7_do) +L(du4_dox): + bf 30,L(du4_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du4_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du4_fini) /* if total DWs = 4, then bypass loop */ + b L(du4_loop) + .align 4 +L(du4_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du4_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du4_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du4_loop) + .align 4 +L(du4_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du5_do): + bf 30,L(du5_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du5_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du5_fini) /* if total DWs = 4, then bypass loop */ + b L(du5_loop) + .align 4 +L(du5_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du5_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du5_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du5_loop) + .align 4 +L(du5_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du6_do): + bf 30,L(du6_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du6_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du6_fini) /* if total DWs = 4, then bypass loop */ + b L(du6_loop) + .align 4 +L(du6_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du6_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du6_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du6_loop) + .align 4 +L(du6_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du7_do): + bf 30,L(du7_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du7_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du7_fini) /* if total DWs = 4, then bypass loop */ + b L(du7_loop) + .align 4 +L(du7_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du7_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du7_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du7_loop) + .align 4 +L(du7_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du_done): + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr1,0f /* If the tail is 0 bytes we are done! */ + + add 3,3,0 + add 12,12,0 +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S new file mode 100644 index 0000000000..aee1c8eabb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S @@ -0,0 +1,395 @@ +/* Optimized 64-bit memset implementation for POWER6. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power6 +EALIGN (MEMSET, 7, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 +#define rMEMP3 r9 /* Alt mem pointer. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ + .align 4 +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ + beq L(medium) /* We may not actually get to do a full line. */ + .align 4 +/* Storing a non-zero "c" value. We are aligned at a sector (32-byte) + boundary may not be at cache line (128-byte) boundary. */ +L(nzloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + addi rMEMP,rMEMP,32 + std rCHR,8(rMEMP3) + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + +/* Now we are aligned to the cache line and can use dcbtst. */ + .align 4 +L(nzCacheAligned): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + .align 5 +L(nzCacheAligned128): + cmpldi cr1,rLEN,256 + addi rMEMP3,rMEMP,64 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + std rCHR,24(rMEMP) + std rCHR,32(rMEMP) + std rCHR,40(rMEMP) + std rCHR,48(rMEMP) + std rCHR,56(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + bge cr1,L(nzCacheAligned128) + dcbtst 0,rMEMP + b L(cacheAligned1) + .align 5 +/* Storing a zero "c" value. We are aligned at a sector (32-byte) + boundary but may not be at cache line (128-byte) boundary. If the + remaining length spans a full cache line we can use the Data cache + block zero instruction. */ +L(zloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + beq L(medium) +L(getCacheAligned): + andi. rTMP,rMEMP,127 + nop + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) +L(getCacheAligned2): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) +L(getCacheAligned3): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAlignedx) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 5 +L(cacheAligned): + cmpldi cr1,rLEN,128 + cmpldi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + li rMEMP2,128 +L(cacheAlignedx): + cmpldi cr5,rLEN,640 + blt cr6,L(cacheAligned128) + bgt cr5,L(cacheAligned512) + cmpldi cr6,rLEN,512 + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAligned256) + .align 5 +/* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ +L(cacheAligned512): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 + addi rMEMP,rMEMP,128 + b L(cacheAligned512) + .align 5 +L(cacheAligned256): + + cmpldi cr6,rLEN,512 + + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + + bge cr6,L(cacheAligned256) + + blt cr1,L(cacheAligned1) + .align 4 +L(cacheAligned128): + dcbz 0,rMEMP + addi rMEMP,rMEMP,128 + addi rLEN,rLEN,-128 + nop +L(cacheAligned1): + cmpldi cr1,rLEN,32 + blt cr1,L(handletail32) + addi rMEMP3,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,-8(rMEMP3) +L(cacheAligned2): + blt cr1,L(handletail32) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + nop +L(cacheAligned3): + blt cr1,L(handletail32) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + +/* We are here because the length or remainder (rLEN) is less than the + cache line/sector size and does not justify aggressive loop unrolling. + So set up the preconditions for L(medium) and go there. */ + .align 3 +L(handletail32): + cmpldi cr1,rLEN,0 + beqlr cr1 + b L(medium) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt 29, L(medium_29t) +L(medium_29f): + bge cr1, L(medium_27t) + bflr 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies new file mode 100644 index 0000000000..2ebe304fa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c new file mode 100644 index 0000000000..ae04a130cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c new file mode 100644 index 0000000000..722c8f995b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c new file mode 100644 index 0000000000..b86472d7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S new file mode 100644 index 0000000000..b6e11ba0c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S @@ -0,0 +1,58 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mftgpr r4,fp1 /* copy FPR to GPR */ + lis r0,0x7ff0 + ori r1,r1,0 + clrldi r4,r4,1 /* x = fabs(x) */ + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S new file mode 100644 index 0000000000..37aa69061c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S @@ -0,0 +1,44 @@ +/* Round double to long int. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power6" +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + mftgpr r3,fp13 + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S new file mode 100644 index 0000000000..62e1798785 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S @@ -0,0 +1,54 @@ +/* llround function. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power6" +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp2,fp1 /* Round to nearest +-0.5. */ + fctidz fp3,fp2 /* Convert To Integer DW round toward 0. */ + mftgpr r3,fp3 /* Transfer integer to R3. */ + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile new file mode 100644 index 0000000000..89a2296085 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile @@ -0,0 +1,11 @@ +ifeq ($(subdir),elf) +# Prevent the use of VSX registers and insns in _dl_start, which under -O3 +# optimization may require a TOC reference before relocations are resolved. +CFLAGS-rtld.c += -mno-vsx +endif + +ifeq ($(subdir),string) +sysdep_routines += strstr-ppc64 +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S new file mode 100644 index 0000000000..6425afbc9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S @@ -0,0 +1,98 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* cycles/limb + * POWER7 2.18 + */ + +#ifdef USE_AS_SUB +# define FUNC __mpn_sub_n +# define ADDSUBC subfe +#else +# define FUNC __mpn_add_n +# define ADDSUBC adde +#endif + +#define RP r3 +#define UP r4 +#define VP r5 +#define N r6 + +EALIGN(FUNC, 5, 0) +#ifdef USE_AS_SUB + addic r0, r0, 0 +#else + addic r0, r1, -1 +#endif + andi. r7, N, 1 + beq L(bx0) + + ld r7, 0(UP) + ld r9, r0(VP) + ADDSUBC r11, r9, r7 + std r11, r0(RP) + cmpldi N, N, 1 + beq N, L(end) + addi UP, UP, 8 + addi VP, VP, 8 + addi RP, RP, 8 + +L(bx0): addi r0, N, 2 + srdi r0, r0, 2 + mtctr r0 + + andi. r7, N, 2 + bne L(mid) + + addi UP, UP, 16 + addi VP, VP, 16 + addi RP, RP, 16 + + .align 5 +L(top): ld r6, -16(UP) + ld r7, -8(UP) + ld r8, -16(VP) + ld r9, -8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, -16(RP) + std r11, -8(RP) +L(mid): ld r6, 0(UP) + ld r7, 8(UP) + ld r8, 0(VP) + ld r9, 8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, 0(RP) + std r11, 8(RP) + addi UP, UP, 32 + addi VP, VP, 32 + addi RP, RP, 32 + bdnz L(top) + +L(end): subfe r3, r0, r0 +#ifdef USE_AS_SUB + neg r3, r3 +#else + addi r3, r3, 1 +#endif + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c new file mode 100644 index 0000000000..4a6a400e7a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c @@ -0,0 +1 @@ +/* Implemented at memmove.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S new file mode 100644 index 0000000000..9ccc758c9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S @@ -0,0 +1,70 @@ +/* finite(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __finite(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __finite, @function + .machine power7 +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,1 + bflr 30 + + /* If we are here, we either have +/-INF, + NaN or denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + clrlwi r4,r4,17 /* r4 = abs(r4). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + bltlr cr7 /* LT means finite, other non-finite. */ + li r3,0 + blr + END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S new file mode 100644 index 0000000000..4482cddcfa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S @@ -0,0 +1,69 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isinf(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isinf, @function + .machine power7 +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 29 /* If not INF, return. */ + + /* Either we have -INF/+INF or a denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ + li r3,-1 + blr + END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S new file mode 100644 index 0000000000..46b08a0d37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S @@ -0,0 +1,68 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isnan, @function + .machine power7 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 30 /* If not NaN, finish. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + ld r4,-16(r1) /* Load FP into GPR. */ + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + blelr cr7 /* LE means not NaN. */ + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c new file mode 100644 index 0000000000..2599c771d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c new file mode 100644 index 0000000000..7a5a8032e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c new file mode 100644 index 0000000000..524ae2c78d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S new file mode 100644 index 0000000000..5e9707aa02 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S @@ -0,0 +1,199 @@ +/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMCHR +# define MEMCHR __memchr +#endif + .machine power7 +ENTRY (MEMCHR) + CALL_MCOUNT 3 + dcbt 0,r3 + clrrdi r8,r3,3 + insrdi r4,r4,8,48 + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r5 + r7 |= -(r7 < x) */ + add r7,r3,r5 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + + insrdi r4,r4,16,32 + cmpldi r5,32 + li r9, -1 + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + insrdi r4,r4,32,0 + addi r7,r7,-1 +#ifdef __LITTLE_ENDIAN__ + sld r9,r9,r6 +#else + srd r9,r9,r6 +#endif + ble L(small_range) + + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */ + and r3,r3,r9 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r6,r7,r8 + srdi r6,r6,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for BYTE in the string. Since + it's a small loop (8 instructions), align it to 32-bytes. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r6,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld r8,r7 + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the doublewords contains BYTE. Check + the first doubleword and decrement the address in case the first + doubleword really contains BYTE. */ + cmpldi cr6,r3,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* BYTE must be in the second doubleword. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + doubleword from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r3,-1 + andc r0,r0,r3 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r3 /* Count leading zeros before the match. */ +#endif + cmpld r8,r7 /* Are we on the last dword? */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 + cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */ + bnelr + blelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + ld r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpld r8,r7 /* Are we done already? */ + bne cr7,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) /* Found something. */ + beqlr /* Hit end of string (length). */ + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + +END (MEMCHR) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S new file mode 100644 index 0000000000..96ce8cee25 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -0,0 +1,1061 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S new file mode 100644 index 0000000000..e08993cbc3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -0,0 +1,430 @@ +/* Optimized memcpy implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define dst 11 /* Use r11 so r3 kept unchanged. */ +#define src 4 +#define cnt 5 + + .machine power7 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,cnt,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + +/* Align copies using VSX instructions to quadword. It is to avoid alignment + traps when memcpy is used on non-cacheable memory (for instance, memory + mapped I/O). */ + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr dst,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,16f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +16: + subf cnt,0,cnt + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,cnt + srdi 12,cnt,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,src + lxvd2x 7,src,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,src + lxvd2x 7,src,6 +L(aligned_128loop): + lxvd2x 8,src,7 + lxvd2x 9,src,8 + stxvd2x 6,0,dst + addi src,src,64 + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi dst,dst,64 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,cnt + bf 25,32f + lxvd2x 6,0,src + lxvd2x 7,src,6 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 +32: + bf 26,16f + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi src,src,32 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + addi dst,dst,32 +16: + bf 27,8f + lxvd2x 6,0,src + addi src,src,16 + stxvd2x 6,0,dst + addi dst,dst,16 +8: + bf 28,4f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr dst,3 + cmpldi cr6,cnt,8 + mtocrf 0x01,cnt + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,cnt,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf cnt,0,cnt +2: + bf 30,1f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,cnt,16 + mtocrf 0x01,cnt + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + lwz 8,8(src) + stw 7,4(dst) + lwz 6,12(src) + addi src,src,16 + stw 8,8(dst) + stw 6,12(dst) + addi dst,dst,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(src) + sth 6,0(dst) + bflr 31 + lbz 7,2(src) + stb 7,2(dst) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(src) + stb 6,4(dst) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(src) + stb 6,0(dst) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + stw 7,4(dst) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf cnt,0,cnt + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,0f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +0: + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,cnt,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,cnt,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,src +#else + lvsl 5,0,src +#endif + lvx 3,0,src + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi src,src,16 + stvx 6,0,dst + addi dst,dst,16 + vor 3,4,4 + clrrdi 0,src,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,src,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi src,src,32 + stvx 6,0,dst + stvx 10,dst,6 + addi dst,dst,32 + bdnz L(unaligned_loop) + + clrrdi 0,src,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,cnt + beqlr cr1 + + add src,src,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S new file mode 100644 index 0000000000..4c0f7c3571 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S @@ -0,0 +1,835 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5]) + + This optimization check if memory 'dest' overlaps with 'src'. If it does + not then it calls an optimized memcpy call (similar to memcpy for POWER7, + embedded here to gain some cycles). + If source and destiny overlaps, a optimized backwards memcpy is used + instead. */ + +#ifndef MEMMOVE +# define MEMMOVE memmove +#endif + .machine power7 +EALIGN (MEMMOVE, 5, 0) + CALL_MCOUNT 3 + +L(_memmove): + subf r9,r4,r3 + cmpld cr7,r9,r5 + blt cr7,L(memmove_bwd) + + cmpldi cr1,r5,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr r11,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,16f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,r5 + srdi 12,r5,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 +L(aligned_128loop): + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + stxvd2x 6,0,r11 + addi r4,r4,64 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r11,r11,64 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 +32: + bf 26,16f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r4,r4,32 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + addi r11,r11,32 +16: + bf 27,8f + lxvd2x 6,0,r4 + addi r4,r4,16 + stxvd2x 6,0,r11 + addi r11,r11,16 +8: + bf 28,4f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr r11,3 + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + lwz 8,8(r4) + stw 7,4(r11) + lwz 6,12(r4) + addi r4,r4,16 + stw 8,8(r11) + stw 6,12(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(r4) + sth 6,0(r11) + bflr 31 + lbz 7,2(r4) + stb 7,2(r11) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(r4) + stb 6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(r4) + stb 6,0(r11) + /* Return original DST pointer. */ + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + stw 7,4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st r11 quadword. */ + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf r5,0,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,0f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +0: + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,r5,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,r4 +#else + lvsl 5,0,r4 +#endif + lvx 3,0,r4 + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi r4,r4,16 + stvx 6,0,r11 + addi r11,r11,16 + vor 3,4,4 + clrrdi 0,r4,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,r4,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi r4,r4,32 + stvx 6,0,r11 + stvx 10,r11,6 + addi r11,r11,32 + bdnz L(unaligned_loop) + + clrrdi 0,r4,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + /* Start to memcpy backward implementation: the algorith first check if + src and dest have the same alignment and if it does align both to 16 + bytes and copy using VSX instructions. + If does not, align dest to 16 bytes and use VMX (altivec) instruction + to read two 16 bytes at time, shift/permute the bytes read and write + aligned to dest. */ +L(memmove_bwd): + cmpldi cr1,r5,31 + /* Copy is done backwards: update the pointers and check alignment. */ + add r11,r3,r5 + add r4,r4,r5 + mr r0,r11 + ble cr1, L(copy_LT_32_bwd) /* If move < 32 bytes use short move + code. */ + + andi. r10,r11,15 /* Check if r11 is aligned to 16 bytes */ + clrldi r9,r4,60 /* Check if r4 is aligned to 16 bytes */ + cmpld cr6,r10,r9 /* SRC and DST alignments match? */ + + bne cr6,L(copy_GE_32_unaligned_bwd) + beq L(aligned_copy_bwd) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,16f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy_bwd): + li r6,-16 + li r7,-32 + li r8,-48 + li r9,-64 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail_bwd) + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + mtctr 12 + b L(aligned_128loop_bwd) + + .align 4 +L(aligned_128head_bwd): + /* for the 2nd + iteration of this loop. */ + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 +L(aligned_128loop_bwd): + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + stxvd2x v6,r11,r6 + subi r4,r4,64 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + lxvd2x v6,r4,r6 + lxvd2x v7,r4,7 + subi r11,r11,64 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 + bdnz L(aligned_128head_bwd) + +L(aligned_tail_bwd): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 +32: + bf 26,16f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + subi r4,r4,32 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + subi r11,r11,32 +16: + bf 27,8f + lxvd2x v6,r4,r6 + subi r4,r4,16 + stxvd2x v6,r11,r6 + subi r11,r11,16 +8: + bf 28,4f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32_bwd): + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8_bwd) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned_bwd) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment_bwd) + lbz 6,-1(r4) + subi r4,r4,1 + stb 6,-1(r11) + subi r11,r11,1 + + .align 4 +L(end_4bytes_alignment_bwd): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned_bwd): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz r6,-4(r4) + lwz r7,-8(r4) + stw r6,-4(r11) + lwz r8,-12(r4) + stw r7,-8(r11) + lwz r6,-16(r4) + subi r4,r4,16 + stw r8,-12(r11) + stw r6,-16(r11) + subi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4_bwd) + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4_bwd): + bf 29,L(tail2_bwd) + lwz 6,-4(r4) + stw 6,-4(r11) + bf 30,L(tail5_bwd) + lhz 7,-6(r4) + sth 7,-6(r11) + bflr 31 + lbz 8,-7(r4) + stb 8,-7(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2_bwd): + bf 30,1f + lhz 6,-2(r4) + sth 6,-2(r11) + bflr 31 + lbz 7,-3(r4) + stb 7,-3(r11) + blr + + .align 4 +L(tail5_bwd): + bflr 31 + lbz 6,-5(r4) + stb 6,-5(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,-1(r4) + stb 6,-1(r11) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8_bwd): + bne cr6,L(tail4_bwd) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + lwz 6,-8(r4) + lwz 7,-4(r4) + stw 6,-8(r11) + stw 7,-4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned_bwd): + andi. r10,r11,15 /* Check alignment of DST against 16 bytes.. */ + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont_bwd) + + /* DST is not quadword aligned and r10 holds the address masked to + compare alignments. */ + mtocrf 0x01,r10 + subf r5,r10,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,0f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +0: + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont_bwd): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi r10,r5,60 + li r6,-16 /* Index for 16-bytes offsets. */ + li r7,-32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi r8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,r9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr v5,r0,r4 +#else + lvsl v5,r0,r4 +#endif + lvx v3,0,r4 + li r0,0 + bf 31,L(setup_unaligned_loop_bwd) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + subi r4,r4,16 + stvx v6,r11,r6 + subi r11,r11,16 + vor v3,v4,v4 + clrrdi r0,r4,60 + +L(setup_unaligned_loop_bwd): + mtctr r8 + ble cr6,L(end_unaligned_loop_bwd) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop_bwd): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + lvx v3,r4,r7 +#ifdef __LITTLE_ENDIAN__ + vperm v10,v4,v3,v5 +#else + vperm v10,v3,v4,v5 +#endif + subi r4,r4,32 + stvx v6,r11,r6 + stvx v10,r11,r7 + subi r11,r11,32 + bdnz L(unaligned_loop_bwd) + + clrrdi r0,r4,60 + + .align 4 +L(end_unaligned_loop_bwd): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr +END_GEN_TB (MEMMOVE, TB_TOCLESS) +libc_hidden_builtin_def (memmove) + + +/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5]) + Implemented in this file to avoid linker create a stub function call + in the branch to '_memmove'. */ +ENTRY (__bcopy) + mr r6,r3 + mr r3,r4 + mr r4,r6 + b L(_memmove) +END (__bcopy) +weak_alias (__bcopy, bcopy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S new file mode 100644 index 0000000000..4e15d1e40c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S @@ -0,0 +1,472 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst' + 'len'. */ + +#ifndef MEMPCPY +# define MEMPCPY __mempcpy +#endif + .machine power7 +EALIGN (MEMPCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,7 /* Check alignment of DST. */ + + + clrldi 10,4,61 /* Check alignment of SRC. */ + cmpld cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srdi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrldi 0,0,61 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* Check alignment of SRC again. */ + srdi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrldi 11,31,61 + mtcrf 0x01,9 + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + /* Main aligned copy loop. Copies 32-bytes at a time. */ + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 + + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + /* Check for tail bytes. */ + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmpldi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + cmpldi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + ld 3,-16(1) /* Return DST + LEN pointer. */ + add 3,3,5 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st + quadword. */ + andi. 11,3,15 /* Check alignment of DST (against + quadwords). */ + srdi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* SRC is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + ld 6,0(12) + addi 12,12,8 + std 6,0(3) + addi 3,3,8 +0: + clrldi 10,12,60 /* Check alignment of SRC. */ + srdi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 11,31,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,11,0 + srdi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + rldicr 0,31,0,59 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + +END_GEN_TB (MEMPCPY,TB_TOCLESS) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S new file mode 100644 index 0000000000..4276768915 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -0,0 +1,201 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMRCHR +# define MEMRCHR __memrchr +#endif + .machine power7 +ENTRY (MEMRCHR) + CALL_MCOUNT 3 + add r7,r3,r5 /* Calculate the last acceptable address. */ + neg r0,r7 + addi r7,r7,-1 + mr r10,r3 + clrrdi r6,r7,7 + li r9,3<<5 + dcbt r9,r6,8 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + li r6,-8 + li r9,-1 + rlwinm r0,r0,3,26,28 /* Calculate padding. */ + clrrdi r8,r7,3 + srd r9,r9,r0 + cmpldi r5,32 + clrrdi r0,r10,3 + ble L(small_range) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bf 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ +#ifdef __LITTLE_ENDIAN__ + ldx r12,r8,r6 +#else + ldbrx r12,r8,r6 +#endif + addi r8,r8,-8 + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the first byte of the string, ie. the dword at + s & ~7, or r0. The first dword read is at r8 - 8, we + read 2 * cnt dwords, so the last dword read will be at + r8 - 8 - 16 * cnt + 8. Solving for cnt gives + cnt = (r8 - r0) / 16 */ + sub r5,r8,r0 + addi r8,r8,-8 + srdi r9,r5,4 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ + + /* Main loop to look for BYTE backwards in the string. + FIXME: Investigate whether 32 byte align helps with this + 9 instruction loop. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 + ldx r11,r8,r6 +#else + ldbrx r12,0,r8 + ldbrx r11,r8,r6 +#endif + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r5,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + addi r8,r8,-16 + bdnz L(loop) + + /* We may have one more word to read. */ + cmpld r8,r0 + bnelr + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the dwords contains BYTE. Check + the first dword. */ + cmpldi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,-8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ +L(done): + cntlzd r9,r3 /* Count leading zeros before the match. */ + cmpld r8,r0 /* Are we on the last word? */ + srdi r6,r9,3 /* Convert leading zeros to bytes. */ + addi r0,r6,-7 + sub r3,r8,r0 + cmpld cr7,r3,r10 + bnelr + bgelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ + cmpld r8,r0 + addi r8,r8,-8 + beqlr + + .align 5 +L(loop_small): +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpld r8,r0 + cmpldi cr7,r3,0 + bne cr7,L(done) + addi r8,r8,-8 + bne L(loop_small) + blr + +END (MEMRCHR) +weak_alias (__memrchr, memrchr) +libc_hidden_builtin_def (memrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S new file mode 100644 index 0000000000..21933c0672 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S @@ -0,0 +1,399 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,5,31 + cmpldi cr6,5,8 + mr 10,3 + + /* Replicate byte to word. */ + insrdi 4,4,8,48 + insrdi 4,4,16,32 + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 + ble cr7,L(medium) /* If length < 32, use medium copy code. */ + + andi. 11,10,7 /* Check alignment of SRC. */ + insrdi 4,4,32,0 /* Replicate word to double word. */ + + mr 12,5 + beq L(big_aligned) + + clrldi 0,0,61 + mtocrf 0x01,0 + subf 5,0,5 + + /* Get DST aligned to 8 bytes. */ +1: bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: bf 30,4f + + sth 4,0(10) + addi 10,10,2 +4: bf 29,L(big_aligned) + + stw 4,0(10) + addi 10,10,4 + + .align 4 +L(big_aligned): + + cmpldi cr5,5,255 + li 0,32 + dcbtst 0,10 + cmpldi cr6,4,0 + srdi 9,5,3 /* Number of full doublewords remaining. */ + crand 27,26,21 + mtocrf 0x01,9 + bt 27,L(huge) + + /* From this point on, we'll copy 32+ bytes and the value + isn't 0 (so we can't use dcbz). */ + + srdi 8,5,5 + clrldi 11,5,61 + cmpldi cr6,11,0 + cmpldi cr1,9,4 + mtctr 8 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + mr 12,10 + blt cr1,L(tail_bytes) + b L(big_loop) + + .align 4 +1: /* Copy 1 doubleword. */ + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + + /* Main aligned copy loop. Copies 32-bytes at a time and + ping-pong through r10 and r12 to avoid AGEN delays. */ + .align 4 +L(big_loop): + addi 12,10,32 + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + bdz L(tail_bytes) + + addi 10,10,64 + std 4,0(12) + std 4,8(12) + std 4,16(12) + std 4,24(12) + bdnz L(big_loop) + + mr 12,10 + b L(tail_bytes) + + .align 4 +L(tail_bytes): + + /* Check for tail bytes. */ + beqlr cr6 + + clrldi 0,5,61 + mtocrf 0x01,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(12) + addi 12,12,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + sth 4,0(12) + addi 12,12,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(12) + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out 128-bytes at a time. Before using + dcbz though, we need to get the destination 128-bytes aligned. */ + .align 4 +L(huge): + andi. 11,10,127 + neg 0,10 + beq L(huge_aligned) + + clrldi 0,0,57 + subf 5,0,5 + srdi 0,0,3 + mtocrf 0x01,0 + + /* Get DST aligned to 128 bytes. */ +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(huge_aligned) + + std 4,0(10) + addi 10,10,8 + + +L(huge_aligned): + srdi 8,5,7 + clrldi 11,5,57 + cmpldi cr6,11,0 + mtctr 8 + + .align 4 +L(huge_loop): + dcbz 0,10 + addi 10,10,128 + bdnz L(huge_loop) + + /* Check how many bytes are still left. */ + beqlr cr6 + + subf 9,3,10 + subf 5,9,12 + srdi 8,5,3 + cmpldi cr6,8,0 + mtocrf 0x01,8 + + /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for + speed. We'll handle the resulting tail bytes later. */ + beq cr6,L(tail) + +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(tail) + + std 4,0(10) + addi 10,10,8 + + /* Handle the rest of the tail bytes here. */ +L(tail): + mtocrf 0x01,5 + + .align 4 +4: bf 29,2f + + stw 4,0(10) + addi 10,10,4 + .align 4 +2: bf 30,1f + + sth 4,0(10) + addi 10,10,2 + .align 4 +1: bflr 31 + + stb 4,0(10) + blr + + /* Expanded tree to copy tail bytes without increments. */ + .align 4 +L(copy_tail): + bf 29,L(FXX) + + stw 4,0(10) + bf 30,L(TFX) + + sth 4,4(10) + bflr 31 + + stb 4,6(10) + blr + + .align 4 +L(FXX): bf 30,L(FFX) + + sth 4,0(10) + bflr 31 + + stb 4,2(10) + blr + + .align 4 +L(TFX): bflr 31 + + stb 4,4(10) + blr + + .align 4 +L(FFX): bflr 31 + + stb 4,0(10) + blr + + /* Handle copies of 9~31 bytes. */ + .align 4 +L(medium): + /* At least 9 bytes to go. */ + andi. 11,10,3 + clrldi 0,0,62 + beq L(medium_aligned) + + /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 +1: /* Copy 1 byte. */ + bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: /* Copy 2 bytes. */ + bf 30,L(medium_aligned) + + sth 4,0(10) + addi 10,10,2 + + .align 4 +L(medium_aligned): + /* At least 6 bytes to go, and DST is word-aligned. */ + cmpldi cr1,5,16 + mtocrf 0x01,5 + blt cr1,8f + + /* Copy 16 bytes. */ + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(10) + addi 10,10,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + sth 4,0(10) + addi 10,10,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(small): + mtocrf 0x01,5 + bne cr6,L(copy_tail) + + stw 4,0(10) + stw 4,4(10) + blr + +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S new file mode 100644 index 0000000000..48afb75943 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S @@ -0,0 +1,115 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] rawmemchr (void *s [r3], int c [r4]) */ + +#ifndef RAWMEMCHR +# define RAWMEMCHR __rawmemchr +#endif + .machine power7 +ENTRY (RAWMEMCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 /* Move left to discard ignored bits. */ + srd r5,r5,r6 /* Bring the bits back as zeros. */ +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r4 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r4 + cmpb r6,r11,r4 + or r7,r5,r6 + cmpdi cr7,r7,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a 'c' byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The 'c' byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + mr r5,r6 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the 'c' byte in the original + doubleword from the string. Use that fact to find out what is + the position of the byte inside the string. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr +END (RAWMEMCHR) +weak_alias (__rawmemchr,rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S new file mode 100644 index 0000000000..a346dd7e28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S new file mode 100644 index 0000000000..e856b8a593 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -0,0 +1,126 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (__STRCMP) +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + CALL_MCOUNT 2 +#else + CALL_MCOUNT 3 +#endif + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) +#else + mr rLOC, rLOCARG +#endif + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 + + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1,L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + b L(loop) +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCMP) + +weak_alias (__STRCMP, STRCMP) +libc_hidden_builtin_def (__STRCMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S new file mode 100644 index 0000000000..a18e2e101c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S @@ -0,0 +1,230 @@ +/* Optimized strchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHR +# define STRCHR strchr +#endif + +/* int [r3] strchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r12,r10,r11 + or r9,r6,r7 + or r5,r12,r9 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + .align 4 +L(no_match): + li r3,0 + blr + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRCHR) +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S new file mode 100644 index 0000000000..27bc1f0682 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S @@ -0,0 +1,131 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul +#endif +/* int [r3] strchrnul (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHRNUL) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r0 /* Compare each byte against c byte. */ + cmpb r9,r12,r4 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r9,r9,r6 + sld r10,r10,r6 + sld r9,r9,r6 +#else + sld r10,r10,r6 + sld r9,r9,r6 + srd r10,r10,r6 + srd r9,r9,r6 +#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + or r5,r9,r10 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 + cmpb r7,r11,r4 + or r5,r9,r10 + or r10,r6,r7 + or r11,r5,r10 + cmpdi cr7,r11,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r5 so we can calculate + the pointer. */ + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +END (STRCHRNUL) +weak_alias (STRCHRNUL, strchrnul) +libc_hidden_builtin_def (STRCHRNUL) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S new file mode 100644 index 0000000000..14e14f457e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S @@ -0,0 +1,168 @@ +/* Optimized strcmp implementation for Power7 using 'cmpb' instruction + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The optimization is achieved here through cmpb instruction. + 8byte aligned strings are processed with double word comparision + and unaligned strings are handled effectively with loop unrolling + technique */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + + or r9, r3, r4 + rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ + bne cr0, L(process_unaligned_bytes) + li r5, 0 + + .align 4 +/* process input parameters on double word aligned boundary */ +L(unrollDword): + ld r8,0(r3) + ld r10,0(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,16(r3) + ld r10,16(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,24(r3) + ld r10,24(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + addi r3, r3, 32 + addi r4, r4, 32 + beq cr7, L(unrollDword) + + .align 4 +L(null_found): +#ifdef __LITTLE_ENDIAN__ + neg r7,r9 + and r9,r9,r7 + li r7,-1 + cntlzd r9,r9 + subfic r9,r9,71 + sld r9,r7,r9 +#else + cntlzd r9,r9 + li r7,-1 + addi r9,r9,8 + srd r9,r7,r9 +#endif + or r8,r8,r9 + or r10,r10,r9 + +L(different): + cmpb r9,r8,r10 +#ifdef __LITTLE_ENDIAN__ + addi r7,r9,1 + andc r9,r7,r9 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + blr + + .align 4 +L(process_unaligned_bytes): + lbz r9, 0(r3) /* load byte from s1 */ + lbz r10, 0(r4) /* load byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 1(r3) /* load next byte from s1 */ + lbz r10, 1(r4) /* load next byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 2(r3) /* unroll 3rd byte here */ + lbz r10, 2(r4) + cmpdi cr7, r9, 0 + beq cr7, L(diffOfNULL) + cmplw cr7, r9, r10 + bne 7, L(ComputeDiff) + + lbz r9, 3(r3) /* unroll 4th byte now */ + lbz r10, 3(r4) + addi r3, r3, 4 /* increment s1 by unroll factor */ + cmpdi cr7, r9, 0 + cmplw cr6, 9, r10 + beq cr7, L(diffOfNULL) + addi r4, r4, 4 /* increment s2 by unroll factor */ + beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ + + .align 4 +L(ComputeDiff): + extsw r9, r9 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 + blr /* return */ + + .align 4 +L(diffOfNULL): + li r9, 0 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 /* sign extend result */ + blr /* return */ + +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S new file mode 100644 index 0000000000..63848c460c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S @@ -0,0 +1,107 @@ +/* Optimized strlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + .machine power7 +ENTRY (STRLEN) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r4 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r4) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r4) + ldu r11, 16(r4) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r4,r4,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S new file mode 100644 index 0000000000..d53b31be8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -0,0 +1,227 @@ +/* Optimized strcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (STRNCMP,5,0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + nop + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + nop + clrldi. rTMP,rTMP,61 + cmpldi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP,rN,3 + clrldi rN,rN,61 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmpldi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + sldi rTMP,rFEFE,32 + insrdi r7F7F,r7F7F,32,0 + add rFEFE,rFEFE,rTMP + b L(g1) + +L(g0): + ldu rWORD1,8(rSTR1) + bne cr1,L(different) + ldu rWORD2,8(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzd rBITDIF,rBITDIF + cntlzd rNEG,rNEG + addi rNEG,rNEG,7 + cmpd cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) + sradi rRTN,rRTN,63 /* must return an int. */ + ori rRTN,rRTN,1 + blr +L(equal): + li rRTN,0 + blr + +L(different): + ld rWORD1,-8(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) + sradi rRTN,rRTN,63 + ori rRTN,rRTN,1 + blr +L(highbit): + sradi rRTN,rWORD2,63 + ori rRTN,rRTN,1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,8 + bne cr1,L(different) + addi rSTR2,rSTR2,8 + cmpldi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + b L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S new file mode 100644 index 0000000000..0224f74898 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S @@ -0,0 +1,722 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Implements the functions + + char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + AND + + char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + The algorithm is as follows: + > if src and dest are 8 byte aligned, perform double word copy + else + > copy byte by byte on unaligned addresses. + + The aligned comparison are made using cmpb instructions. */ + +/* The focus on optimization for performance improvements are as follows: + 1. data alignment [gain from aligned memory access on read/write] + 2. POWER7 gains performance with loop unrolling/unwinding + [gain by reduction of branch penalty]. + 3. The final pad with null bytes is done by calling an optimized + memset. */ + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + +#ifndef MEMSET +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + + .machine power7 +EALIGN(FUNC_NAME, 4, 0) + CALL_MCOUNT 3 + + mflr r0 /* load link register LR to r0 */ + or r10, r3, r4 /* to verify source and destination */ + rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ + + std r19, -8(r1) /* save callers register , r19 */ + std r18, -16(r1) /* save callers register , r18 */ + std r0, 16(r1) /* store the link register */ + stdu r1, -FRAMESIZE(r1) /* create the stack frame */ + + mr r9, r3 /* save r3 into r9 for use */ + mr r18, r3 /* save r3 for retCode of strncpy */ + bne 0, L(unaligned) + +L(aligned): + srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ + cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ + ble 7, L(update1) + + ld r10, 0(r4) /* load doubleWord from src */ + cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne cr7, L(update3) + + std r10, 0(r3) /* copy doubleword at offset=0 */ + ld r10, 8(r4) /* load next doubleword from offset=8 */ + cmpb r8, r10, r8 /* compare src with NULL , we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne 7,L(HopBy8) + + addi r8, r11, -4 + mr r7, r3 + srdi r8, r8, 2 + mr r6, r4 + addi r8, r8, 1 + li r12, 0 + mtctr r8 + b L(dwordCopy) + + .p2align 4 +L(dWordUnroll): + std r8, 16(r9) + ld r8, 24(r4) /* load dword,perform loop unrolling again */ + cmpb r10, r8, r10 + cmpdi cr7, r10, 0 + bne cr7, L(HopBy24) + + std r8, 24(r7) /* copy dword at offset=24 */ + addi r9, r9, 32 + addi r4, r4, 32 + bdz L(leftDwords) /* continue with loop on counter */ + + ld r3, 32(r6) + cmpb r8, r3, r10 + cmpdi cr7, r8, 0 + bne cr7, L(update2) + + std r3, 32(r7) + ld r10, 40(r6) + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(HopBy40) + + mr r6, r4 /* update values */ + mr r7, r9 + mr r11, r0 + mr r5, r19 + +L(dwordCopy): + std r10, 8(r9) /* copy dword at offset=8 */ + addi r19, r5, -32 + addi r0, r11, -4 + ld r8, 16(r4) + cmpb r10, r8, r12 + cmpdi cr7, r10, 0 + beq cr7, L(dWordUnroll) + + addi r9, r9, 16 /* increment dst by 16 */ + addi r4, r4, 16 /* increment src by 16 */ + addi r5, r5, -16 /* decrement length 'n' by 16 */ + addi r0, r11, -2 /* decrement loop counter */ + +L(dWordUnrollOFF): + ld r10, 0(r4) /* load first dword */ + li r8, 0 /* load mask */ + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + mtctr r0 + li r7, 0 + b L(CopyDword) + + .p2align 4 +L(loadDWordandCompare): + ld r10, 0(r4) + cmpb r8, r10, r7 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + +L(CopyDword): + addi r9, r9, 8 + std r10, -8(r9) + addi r4, r4, 8 + addi r5, r5, -8 + bdnz L(loadDWordandCompare) + +L(byte_by_byte): + cmpldi cr7, r5, 3 + ble cr7, L(verifyByte) + srdi r10, r5, 2 + mr r19, r9 + mtctr r10 + b L(firstByteUnroll) + + .p2align 4 +L(bytes_unroll): + lbz r10, 1(r4) /* load byte from src */ + cmpdi cr7, r10, 0 /* compare for NULL */ + stb r10, 1(r19) /* store byte to dst */ + beq cr7, L(updtDestComputeN2ndByte) + + addi r4, r4, 4 /* advance src */ + + lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, r10, 0 + stb r10, 2(r19) + beq cr7, L(updtDestComputeN3rdByte) + + lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ + addi r19, r19, 4 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + beq cr7, L(ComputeNByte) + + bdz L(update0) + +L(firstByteUnroll): + lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, 10, 0 + stb r10, 0(r19) + bne cr7, L(bytes_unroll) + addi r19, r19, 1 + +L(ComputeNByte): + subf r9, r19, r9 /* compute 'n'n bytes to fill */ + add r8, r9, r5 + +L(zeroFill): + cmpdi cr7, r8, 0 /* compare if length is zero */ + beq cr7, L(update3return) + + mr r3, r19 /* fill buffer with */ + li r4, 0 /* zero fill buffer */ + mr r5, r8 /* how many bytes to fill buffer with */ + bl MEMSET /* call optimized memset */ + nop + +L(update3return): +#ifdef USE_AS_STPNCPY + addi r3, r19, -1 /* update return value */ +#endif + +L(hop2return): +#ifndef USE_AS_STPNCPY + mr r3, r18 /* set return value */ +#endif + addi r1, r1, FRAMESIZE /* restore stack pointer */ + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + + .p2align 4 +L(update0): + mr r9, r19 + + .p2align 4 +L(verifyByte): + rldicl. r8, r5, 0, 62 +#ifdef USE_AS_STPNCPY + mr r3, r9 +#endif + beq cr0, L(hop2return) + mtctr r8 + addi r4, r4, -1 + mr r19, r9 + b L(oneBYone) + + .p2align 4 +L(proceed): + bdz L(done) + +L(oneBYone): + lbzu r10, 1(r4) /* copy byte */ + addi r19, r19, 1 + addi r8, r8, -1 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + bne cr7, L(proceed) + b L(zeroFill) + + .p2align 4 +L(done): + addi r1, r1, FRAMESIZE /* restore stack pointer */ +#ifdef USE_AS_STPNCPY + mr r3, r19 /* set the return value */ +#else + mr r3, r18 /* set the return value */ +#endif + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + +L(update1): + mr r0, r11 + mr r19, r5 + + .p2align 4 +L(leftDwords): + cmpdi cr7, r0, 0 + mr r5, r19 + bne cr7, L(dWordUnrollOFF) + b L(byte_by_byte) + + .p2align 4 +L(updtDestComputeN2ndByte): + addi r19, r19, 2 /* update dst by 2 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(updtDestComputeN3rdByte): + addi r19, r19, 3 /* update dst by 3 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(HopBy24): + addi r9, r9, 24 /* increment dst by 24 */ + addi r4, r4, 24 /* increment src by 24 */ + addi r5, r5, -24 /* decrement length 'n' by 24 */ + addi r0, r11, -3 /* decrement loop counter */ + b L(dWordUnrollOFF) + + .p2align 4 +L(update2): + mr r5, r19 + b L(dWordUnrollOFF) + + .p2align 4 +L(HopBy40): + addi r9, r7, 40 /* increment dst by 40 */ + addi r4, r6, 40 /* increment src by 40 */ + addi r5, r5, -40 /* decrement length 'n' by 40 */ + addi r0, r11, -5 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(update3): + mr r0, r11 + b L(dWordUnrollOFF) + +L(HopBy8): + addi r9, r3, 8 /* increment dst by 8 */ + addi r4, r4, 8 /* increment src by 8 */ + addi r5, r5, -8 /* decrement length 'n' by 8 */ + addi r0, r11, -1 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(unaligned): + cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ + ble L(byte_by_byte) + rldicl r7, r3, 0, 61 + rldicl r6, r4, 0, 61 + cmpdi r6, 0 /* Check src alignment */ + beq L(srcaligndstunalign) + /* src is unaligned */ + rlwinm r10, r4, 3,26,28 /* Calculate padding. */ + clrrdi r4, r4, 3 /* Align the addr to dw boundary */ + ld r8, 0(r4) /* Load doubleword from memory. */ + li r0, 0 + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + srd r7, r8, r10 +#else + sld r7, r8, r10 +#endif + cmpb r0, r7, r0 /* Compare each byte against null */ + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + sld r0, r0, r10 +#else + srd r0, r0, r10 +#endif + cmpdi r0, 0 + bne L(bytebybyte) /* if it has null, copy byte by byte */ + subfic r6, r6, 8 + rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ + rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ + addi r3, r3, -1 + + cmpdi r12, 0 /* check dest alignment */ + beq L(srcunaligndstalign) + + /* both src and dst unaligned */ +#ifdef __LITTLE_ENDIAN__ + sld r8, r7, r10 + mr r11, r10 + addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ +#else + srd r8, r7, r10 + subfic r11, r10, 64 +#endif + /* dst alignment is greater then src alignment? */ + cmpd cr7, r12, r10 + ble cr7, L(dst_align_small) + /* src alignment is less than dst */ + + /* Calculate the dst alignment difference */ + subfic r7, r9, 8 + mtctr r7 + + /* Write until dst is aligned */ + cmpdi r0, r7, 4 + blt L(storebyte1) /* less than 4, store byte by byte */ + beq L(equal1) /* if its 4, store word */ + addi r0, r7, -4 /* greater than 4, so stb and stw */ + mtctr r0 +L(storebyte1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte1) + + subfic r7, r9, 8 /* Check the remaining bytes */ + cmpdi r0, r7, 4 + blt L(proceed1) + + .align 4 +L(equal1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r3, r3, 4 + addi r5, r5, -4 + +L(proceed1): + mr r7, r8 + /* calculate the Left over bytes to be written */ + subfic r11, r10, 64 + subfic r12, r12, 64 + subf r12, r12, r11 /* remaining bytes on second dw */ + subfic r10, r12, 64 /* remaining bytes on first dw */ + subfic r9, r9, 8 + subf r6, r9, r6 /* recalculate padding */ +L(srcunaligndstalign): + addi r3, r3, 1 + subfic r12, r10, 64 /* remaining bytes on second dw */ + addi r4, r4, 8 + li r0,0 + b L(storedouble) + + .align 4 +L(dst_align_small): + mtctr r6 + /* Write until src is aligned */ +L(storebyte2): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte2) + + addi r4, r4, 8 /* Increment src pointer */ + addi r3, r3, 1 /* Increment dst pointer */ + mr r9, r3 + li r8, 0 + cmpd cr7, r12, r10 + beq cr7, L(aligned) + rldicl r6, r3, 0, 61 /* Recalculate padding */ + mr r7, r6 + + /* src is algined */ +L(srcaligndstunalign): + mr r9, r3 + mr r6, r7 + ld r8, 0(r4) + subfic r10, r7, 8 + mr r7, r8 + li r0, 0 /* Check null */ + cmpb r0, r8, r0 + cmpdi r0, 0 + bne L(byte_by_byte) /* Do byte by byte if there is NULL */ + rlwinm r12, r3, 3,26,28 /* Calculate padding */ + addi r3, r3, -1 + /* write byte by byte until aligned */ +#ifdef __LITTLE_ENDIAN__ + li r11, -8 +#else + li r11, 64 +#endif + mtctr r10 + cmpdi r0, r10, 4 + blt L(storebyte) + beq L(equal) + addi r0, r10, -4 + mtctr r0 +L(storebyte): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte) + + cmpdi r0, r10, 4 + blt L(align) + + .align 4 +L(equal): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 +L(align): + addi r3, r3, 1 + addi r4, r4, 8 /* Increment src pointer */ + subfic r10, r12, 64 + li r0, 0 + /* dst addr aligned to 8 */ +L(storedouble): + cmpdi r5, 8 + ble L(null1) + ld r7, 0(r4) /* load next dw */ + cmpb r0, r7, r0 + cmpdi r0, 0 /* check for null on each new dw */ + bne L(null) +#ifdef __LITTLE_ENDIAN__ + srd r9, r8, r10 /* bytes from first dw */ + sld r11, r7, r12 /* bytes from second dw */ +#else + sld r9, r8, r10 + srd r11, r7, r12 +#endif + or r11, r9, r11 /* make as a single dw */ + std r11, 0(r3) /* store as std on aligned addr */ + mr r8, r7 /* still few bytes left to be written */ + addi r3, r3, 8 /* increment dst addr */ + addi r4, r4, 8 /* increment src addr */ + addi r5, r5, -8 + b L(storedouble) /* Loop until NULL */ + + .align 4 + +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(null): + addi r3, r3, -1 + mr r10, r12 + mtctr r6 +#ifdef __LITTLE_ENDIAN__ + subfic r10, r10, 64 + addi r10, r10, -8 +#endif + cmpdi r0, r5, 4 + blt L(loop) + cmpdi r0, r6, 4 + blt L(loop) + + /* we can still use stw if leftover >= 4 */ +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 + srd r11, r8, r10 +#else + subfic r10, r10, 64 + sld r11, r8, r10 + srdi r11, r11, 32 +#endif + stw r11, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 + cmpdi r0, r5, 0 + beq L(g1) + cmpdi r0, r6, 4 + beq L(bytebybyte1) + addi r10, r10, 32 +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, -8 +#else + subfic r10, r10, 64 +#endif + addi r0, r6, -4 + mtctr r0 + /* remaining byte by byte part of first dw */ +L(loop): +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 +#else + addi r10, r10, -8 +#endif + srd r0, r8, r10 + stbu r0, 1(r3) + addi r5, r5, -1 + cmpdi r0, r5, 0 + beq L(g1) + bdnz L(loop) +L(bytebybyte1): + addi r3, r3, 1 + /* remaining byte by byte part of second dw */ +L(bytebybyte): + addi r3, r3, -8 + addi r4, r4, -1 + +#ifdef __LITTLE_ENDIAN__ + extrdi. r0, r7, 8, 56 + stbu r7, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi r0, r7, 8, 0 + stbu r0, 1(r3) + addi r5, r5, -1 + b L(g2) +#else + extrdi. r0, r7, 8, 0 + stbu r0, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + stbu r7, 1(r3) + addi r5, r5, -1 + b L(g2) +#endif +L(g1): +#ifdef USE_AS_STPNCPY + addi r3, r3, 1 +#endif +L(g2): + addi r3, r3, 1 + mr r19, r3 + mr r8, r5 + b L(zeroFill) +L(null1): + mr r9, r3 + subf r4, r6, r4 + b L(byte_by_byte) +END(FUNC_NAME) +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S new file mode 100644 index 0000000000..a970b6ce30 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S @@ -0,0 +1,182 @@ +/* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNLEN +# define STRNLEN __strnlen +#endif + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (STRNLEN) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmpldi r4,32 + li r0,0 /* Doubleword with null chars. */ + addi r7,r7,-1 + + /* If we have less than 33 bytes to search, skip to a faster code. */ + ble L(small_range) + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr7,r10,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r5,r7,r8 + srdi r6,r5,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld cr6,r8,r7 + beq cr6,L(end_max) + + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + +L(end_max): + mr r3,r4 + blr + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + .align 4 +L(found): + cmpldi cr6,r10,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 + blelr + mr r3,r4 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r4,0 + beq L(end_max) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 + bne cr7,L(done) + + cmpld r8,r7 + beq L(end_max) + + .p2align 5 +L(loop_small): + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + cmpld r8,r7 + bne L(loop_small) + mr r3,r4 + blr + +END (STRNLEN) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S new file mode 100644 index 0000000000..c22393deb5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S @@ -0,0 +1,260 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strrchr (char *s [r3], int c [r4]) */ + +#ifndef STRRCHR +# define STRRCHR strrchr +#endif + + .machine power7 +ENTRY (STRRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* used to store last occurence */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now ,if its passed as more chars + check for null again */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* if there are more than one 0xff in r11, find the first pos of ff + in r11 and fill r10 with 0 from that position */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRRCHR) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c new file mode 100644 index 0000000000..a917b2157e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c @@ -0,0 +1,27 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S new file mode 100644 index 0000000000..260db2ed6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S @@ -0,0 +1,521 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Char * [r3] strstr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained using aligned memory access, load + * doubleword and usage of cmpb instruction for quicker comparison. */ + +#define ITERATIONS 64 + +#ifndef STRSTR +# define STRSTR strstr +#endif + +#ifndef STRLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + .machine power7 +EALIGN (STRSTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + mr r3, r4 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + mr r3, r29 + lbz r4, 0(r30) + bl STRCHR + nop + + mr r11, r3 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + /* Reg r28 is used to count the number of iterations. */ + li r28, 0 + rldicl r8, r3, 0, 52 /* Page cross check. */ + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + rldicl r8, r30, 0, 52 + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + /* If len(r4) < 8 handle in a different way. */ + /* Shift position based on null and use cmpb. */ + cmpdi cr7, r31, 8 + blt cr7, L(lessthan8) + + /* Len(r4) >= 8 reaches here. */ + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + li r0, 0 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + clrrdi r4, r4, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(begin1) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ +L(begin1): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r3, r3, 3 + ld r5, 0(r3) + cmpb r9, r0, r6 /* Check if input has null. */ + cmpdi cr7, r9, 0 + bne cr7, L(return3) + cmpb r9, r0, r5 /* Check if input has null. */ +#ifdef __LITTLE_ENDIAN__ + srd r9, r9, r10 +#else + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ + cmpdi cr7, r10, 0 + beq cr7, L(nextbyte1) + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + +L(nextbyte1): + ldu r7, 8(r3) /* Load next dw. */ + addi r12, r12, 8 /* Shift one byte and compare. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 /* Rotate based on mask. */ + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw from few bytes on first load and second load. */ + or r10, r9, r10 + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* Cmpb search str and input str. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(match) + addi r8, r8, 1 + b L(begin) + + .align 4 +L(match): + /* There is a match of 8 bytes, check next bytes. */ + cmpdi cr7, r31, 8 + beq cr7, L(return) + /* Update next starting point r8. */ + srdi r9, r11, 3 + subf r9, r9, r3 + mr r8, r9 + +L(secondmatch): + mr r5, r7 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed3) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ + cmpb r9, r0, r6 + sld r9, r9, r10 +#else + sld r6, r6, r10 + cmpb r9, r0, r6 + srd r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(proceed3) + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ + +L(proceed3): + li r7, 0 + addi r3, r3, 8 + cmpb r9, r0, r5 + cmpdi cr7, r9, 0 + bne cr7, L(proceed4) + ld r7, 0(r3) +L(proceed4): +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw with few bytes from first and second load. */ + or r10, r9, r10 + cmpb r9, r0, r6 + cmpdi cr7, r9, 0 + bne cr7, L(return4) + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* If the next 8 bytes dont match, start search again. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + bne cr7, L(reset) + /* If the next 8 bytes match, load and compare next 8. */ + b L(secondmatch) + + .align 4 +L(reset): + /* Start the search again. */ + addi r8, r8, 1 + b L(begin) + + .align 4 +L(return3): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r5, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r5, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + /* Start search again if there is no match. */ + bne cr7, L(begin) + /* If the words match, update return values. */ + subfic r7, r7, 64 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(return4): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r10, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r10, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + bne cr7, L(begin) + subfic r7, r7, 64 + srdi r11, r11, 3 + subf r3, r11, r3 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(begin): + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r8, r3 + mr r4, r30 /* Restore r4. */ + li r0, 0 + mr r6, r29 + clrrdi r4, r4, 3 + addi r4, r4, 8 + b L(begin1) + + /* Handle less than 8 search string. */ + .align 4 +L(lessthan8): + mr r4, r3 + mr r9, r30 + li r0, 0 + + rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */ + srdi r8, r10, 3 /* Padding in bytes. */ + clrrdi r9, r9, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r9) + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed2) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + subfic r8, r8, 8 + cmpd cr7, r8, r31 /* Next load needed? */ + bge cr7, L(proceed2) + ld r7, 8(r9) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r7, r7, r10 /* Discard unwanted bits. */ +#else + srd r7, r7, r10 +#endif + or r6, r6, r7 /* Form complete search str. */ +L(proceed2): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r7, r3, 3 /* Make r3 aligned. */ + ld r5, 0(r7) + sldi r8, r31, 3 + subfic r8, r8, 64 +#ifdef __LITTLE_ENDIAN__ + sld r6, r6, r8 + cmpb r9, r0, r5 + srd r9, r9, r10 +#else + srd r6, r6, r8 + cmpb r9, r0, r5 + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(noload) + cmpdi cr7, r10, 0 + beq cr7, L(continue) + ld r7, 8(r7) +L(continue1): + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + b L(nextbyte) + + .align 4 +L(continue): + ld r7, 8(r7) + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ +L(nextbyte): + addi r12, r12, 8 /* Mask for rotation. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 + or r10, r9, r10 + sld r10, r10, r8 + cmpb r9, r0, r10 + srd r9, r9, r8 +#else + sld r9, r5, r12 + srd r10, r7, r11 + or r10, r9, r10 + srd r10, r10, r8 + cmpb r9, r0, r10 + sld r9, r9, r8 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(end) + addi r3, r4, 1 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r4, r3 + mr r6, r29 + li r0, 0 + b L(proceed2) + + .align 4 +L(noload): + /* Reached null in r3, so skip next load. */ + li r7, 0 + b L(continue1) + + .align 4 +L(return): + /* Update return values. */ + srdi r9, r11, 3 + subf r3, r9, r3 + b L(end) + + /* Handling byte by byte. */ + .align 4 +L(bytebybyte): + mr r8, r3 + addi r8, r8, -1 +L(loop1): + addi r8, r8, 1 + mr r3, r8 + mr r4, r30 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) +L(loop): + lbz r5, 0(r3) + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpld cr7, r6, r5 + bne cr7, L(loop1) + addi r3, r3, 1 + addi r4, r4, 1 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce len of r4 from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return r3. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Return NULL. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strstr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r28, -32(r1) /* Restore callers save register r28. */ + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + mtlr r0 /* Branch to link register. */ + blr +END (STRSTR) +libc_hidden_builtin_def (strstr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S new file mode 100644 index 0000000000..848dad5718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S @@ -0,0 +1,23 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_SUB +#include "add_n.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies new file mode 100644 index 0000000000..9a5e3c7277 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power7/fpu +powerpc/powerpc64/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile new file mode 100644 index 0000000000..71a59529f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),string) +sysdep_routines += strcasestr-ppc64 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies new file mode 100644 index 0000000000..1187cdfb0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S new file mode 100644 index 0000000000..4c42926a74 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S @@ -0,0 +1,303 @@ +/* Optimized expf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Short algorithm description: + * + * Let K = 64 (table size). + * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) + * where: + * x = m*log(2)/K + y, y in [0.0..log(2)/K] + * m = n*K + j, m,n,j - signed integer, j in [0..K-1] + * values of 2^(j/K) are tabulated as T[j]. + * + * P(y) is a minimax polynomial approximation of expf(y)-1 + * on small interval [0.0..log(2)/K]. + * + * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as + * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y + * + * Special cases: + * expf(NaN) = NaN + * expf(+INF) = +INF + * expf(-INF) = 0 + * expf(x) = 1 for subnormals + * for finite argument, only expf(0)=1 is exact + * expf(x) overflows if x>88.7228317260742190 + * expf(x) underflows if x<-103.972076416015620 + */ + +#define C1 0x42ad496b /* Single precision 125*log(2). */ +#define C2 0x31800000 /* Single precision 2^(-28). */ +#define SP_INF 0x7f800000 /* Single precision Inf. */ +#define SP_EXP_BIAS 0x1fc0 /* Single precision exponent bias. */ + +#define DATA_OFFSET r9 + +/* Implements the function + + float [fp1] expf (float [fp1] x) */ + + .machine power8 +EALIGN(__ieee754_expf, 4, 0) + addis DATA_OFFSET,r2,.Lanchor@toc@ha + addi DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 /* r8 = x */ + lfd fp2,(.KLN2-.Lanchor)(DATA_OFFSET) + lfd fp3,(.P2-.Lanchor)(DATA_OFFSET) + rldicl r3,r8,32,33 /* r3 = |x| */ + lis r4,C1@ha /* r4 = 125*log(2) */ + ori r4,r4,C1@l + cmpw r3,r4 + lfd fp5,(.P3-.Lanchor)(DATA_OFFSET) + lfd fp4,(.RS-.Lanchor)(DATA_OFFSET) + fmadd fp2,fp1,fp2,fp4 /* fp2 = x * K/log(2) + (2^23 + 2^22) */ + bge L(special_paths) /* |x| >= 125*log(2) ? */ + + lis r4,C2@ha + ori r4,r4,C2@l + cmpw r3,r4 + blt L(small_args) /* |x| < 2^(-28) ? */ + + /* Main path: here if 2^(-28) <= |x| < 125*log(2) */ + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + lis r4,SP_EXP_BIAS@ha + ori r4,r4,SP_EXP_BIAS@l + add r3,r3,r4 + rldic r3,r3,49,1 /* r3 = 2^n */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + xscvspdp v1,v1 + fmul fp4,fp4,fp2 /* fp4 = (P3 * z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + P(y)) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + P(y)) */ + frsp fp1,fp1 + blr + + .align 4 +/* x is either underflow, overflow, infinite or NaN. */ +L(special_paths): + srdi r8,r8,32 + rlwinm r8,r8,3,29,29 /* r8 = 0, if x positive. + r8 = 4, otherwise. */ + addi r6,DATA_OFFSET,(.SPRANGE-.Lanchor) + lwzx r4,r6,r8 /* r4 = .SPRANGE[signbit(x)] */ + cmpw r3,r4 + /* |x| <= .SPRANGE[signbit(x)] */ + ble L(near_under_or_overflow) + + lis r4,SP_INF@ha + ori r4,r4,SP_INF@l + cmpw r3,r4 + bge L(arg_inf_or_nan) /* |x| > Infinite ? */ + + addi r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor) + lfsx fp1,r6,r8 + fmuls fp1,fp1,fp1 + blr + + + .align 4 +L(small_args): + /* expf(x) = 1.0, where |x| < |2^(-28)| */ + lfs fp2,(.SPone-.Lanchor)(DATA_OFFSET) + fadds fp1,fp1,fp2 + blr + + + .align 4 +L(arg_inf_or_nan:) + bne L(arg_nan) + + /* expf(+INF) = +INF + expf(-INF) = 0 */ + addi r6,DATA_OFFSET,(.INF_ZERO-.Lanchor) + lfsx fp1,r6,r8 + blr + + + .align 4 +L(arg_nan): + /* expf(NaN) = NaN */ + fadd fp1,fp1,fp1 + frsp fp1,fp1 + blr + + .align 4 +L(near_under_or_overflow): + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + ld r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET) + add r3,r3,r4 + rldic r3,r3,46,1 /* r3 = 2 */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + fmul fp4,fp4,fp2 /* fp4 = (P3*z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + T[j]) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + T[j]) */ + frsp fp1,fp1 + blr +END(__ieee754_expf) + + .section .rodata, "a",@progbits +.Lanchor: + .balign 8 +/* Table T[j] = 2^(j/K). Double precision. */ +.Ttable: + .8byte 0x3ff0000000000000 + .8byte 0x3ff02c9a3e778061 + .8byte 0x3ff059b0d3158574 + .8byte 0x3ff0874518759bc8 + .8byte 0x3ff0b5586cf9890f + .8byte 0x3ff0e3ec32d3d1a2 + .8byte 0x3ff11301d0125b51 + .8byte 0x3ff1429aaea92de0 + .8byte 0x3ff172b83c7d517b + .8byte 0x3ff1a35beb6fcb75 + .8byte 0x3ff1d4873168b9aa + .8byte 0x3ff2063b88628cd6 + .8byte 0x3ff2387a6e756238 + .8byte 0x3ff26b4565e27cdd + .8byte 0x3ff29e9df51fdee1 + .8byte 0x3ff2d285a6e4030b + .8byte 0x3ff306fe0a31b715 + .8byte 0x3ff33c08b26416ff + .8byte 0x3ff371a7373aa9cb + .8byte 0x3ff3a7db34e59ff7 + .8byte 0x3ff3dea64c123422 + .8byte 0x3ff4160a21f72e2a + .8byte 0x3ff44e086061892d + .8byte 0x3ff486a2b5c13cd0 + .8byte 0x3ff4bfdad5362a27 + .8byte 0x3ff4f9b2769d2ca7 + .8byte 0x3ff5342b569d4f82 + .8byte 0x3ff56f4736b527da + .8byte 0x3ff5ab07dd485429 + .8byte 0x3ff5e76f15ad2148 + .8byte 0x3ff6247eb03a5585 + .8byte 0x3ff6623882552225 + .8byte 0x3ff6a09e667f3bcd + .8byte 0x3ff6dfb23c651a2f + .8byte 0x3ff71f75e8ec5f74 + .8byte 0x3ff75feb564267c9 + .8byte 0x3ff7a11473eb0187 + .8byte 0x3ff7e2f336cf4e62 + .8byte 0x3ff82589994cce13 + .8byte 0x3ff868d99b4492ed + .8byte 0x3ff8ace5422aa0db + .8byte 0x3ff8f1ae99157736 + .8byte 0x3ff93737b0cdc5e5 + .8byte 0x3ff97d829fde4e50 + .8byte 0x3ff9c49182a3f090 + .8byte 0x3ffa0c667b5de565 + .8byte 0x3ffa5503b23e255d + .8byte 0x3ffa9e6b5579fdbf + .8byte 0x3ffae89f995ad3ad + .8byte 0x3ffb33a2b84f15fb + .8byte 0x3ffb7f76f2fb5e47 + .8byte 0x3ffbcc1e904bc1d2 + .8byte 0x3ffc199bdd85529c + .8byte 0x3ffc67f12e57d14b + .8byte 0x3ffcb720dcef9069 + .8byte 0x3ffd072d4a07897c + .8byte 0x3ffd5818dcfba487 + .8byte 0x3ffda9e603db3285 + .8byte 0x3ffdfc97337b9b5f + .8byte 0x3ffe502ee78b3ff6 + .8byte 0x3ffea4afa2a490da + .8byte 0x3ffefa1bee615a27 + .8byte 0x3fff50765b6e4540 + .8byte 0x3fffa7c1819e90d8 + +.KLN2: + .8byte 0x40571547652b82fe /* Double precision K/log(2). */ + +/* Double precision polynomial coefficients. */ +.P0: + .8byte 0x3fefffffffffe7c6 +.P1: + .8byte 0x3fe00000008d6118 +.P2: + .8byte 0x3fc55550da752d4f +.P3: + .8byte 0x3fa56420eb78fa85 + +.RS: + .8byte 0x4168000000000000 /* Double precision 2^23 + 2^22. */ +.NLN2K: + .8byte 0xbf862e42fefa39ef /* Double precision -log(2)/K. */ +.DP_EXP_BIAS: + .8byte 0x000000000000ffc0 /* Double precision exponent bias. */ + + .balign 4 +.SPone: + .4byte 0x3f800000 /* Single precision 1.0. */ +.SP_RS: + .4byte 0x4b400000 /* Single precision 2^23 + 2^22. */ + +.SPRANGE: /* Single precision overflow/underflow bounds. */ + .4byte 0x42b17217 /* if x>this bound, then result overflows. */ + .4byte 0x42cff1b4 /* if x<this bound, then result underflows. */ + +.SPLARGE_SMALL: + .4byte 0x71800000 /* 2^100. */ + .4byte 0x0d800000 /* 2^-100. */ + +.INF_ZERO: + .4byte 0x7f800000 /* Single precision Inf. */ + .4byte 0 /* Single precision zero. */ + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..7fd86fdf87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S new file mode 100644 index 0000000000..8dfa0076e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S @@ -0,0 +1,508 @@ +/* Optimized cosf(). PowerPC64/POWER8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^23 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] cosf (float [fp1] x) */ + + .machine power8 +EALIGN(__cosf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp1,fp2,fp4,fp3 /* 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + addi r7,r7,2 + rldicl r4,r7,62,63 /* ((n+3) >> 2) & 1 */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + 1.0+x^2*(CC0+x^3*CC1). */ + + lfd fp10,(L(CC0)-L(anchor))(r9) + lfd fp11,(L(CC1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + lfd fp1,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp3,fp11,fp10 /* CC0+x^3*CC1 */ + fmadd fp1,fp2,fp4,fp1 /* 1.0+x^2*(CC0+x^3*CC1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + /* Handle some special cases: + + cosf(subnormal) raises inexact + cosf(min_normalized) raises inexact + cosf(normalized) raises inexact. */ + + lfd fp2,(L(DPone)-L(anchor))(r9) + + fabs fp1,fp1 /* |x| */ + fsub fp1,fp2,fp1 /* 1.0-|x| */ + + frsp fp1,fp1 + + blr + +END (__cosf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for cos, range 2^-27 - 2^-5. */ +L(CC0): .8byte 0xbfdfffffff5cc6fd +L(CC1): .8byte 0x3fa55514b178dac5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S new file mode 100644 index 0000000000..fcdcb60293 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S @@ -0,0 +1,56 @@ +/* isfinite(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __finite ([fp1] x) */ + +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x8010 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + add r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S new file mode 100644 index 0000000000..32814e4525 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S @@ -0,0 +1,61 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isinf([fp1] x) */ + +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 /* r9 = 0x7ff0 */ + rldicl r10,r3,0,1 /* r10 = r3 & (0x8000000000000000) */ + sldi r9,r9,32 /* r9 = r9 << 52 */ + cmpd cr7,r10,r9 /* fp1 & 0x7ff0000000000000 ? */ + beq cr7,L(inf) + li r3,0 /* Not inf */ + blr +L(inf): + sradi r3,r3,63 /* r3 = r3 >> 63 */ + ori r3,r3,1 /* r3 = r3 | 0x1 */ + blr +END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S new file mode 100644 index 0000000000..af52e502b7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isnan([f1] x) */ + +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + subf r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S new file mode 100644 index 0000000000..aa180b6901 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S @@ -0,0 +1,45 @@ +/* Round double to long int. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp1,fp1 + MFVSRD_R3_V1 + blr +END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S new file mode 100644 index 0000000000..043fc6a089 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S @@ -0,0 +1,48 @@ +/* llround function. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long [r3] llround (float x [fp1]) */ + +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp1,fp1 /* Round to nearest +-0.5. */ + fctidz fp1,fp1 /* Convert To Integer DW round toward 0. */ + MFVSRD_R3_V1 + blr +END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S new file mode 100644 index 0000000000..fb0add3462 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S @@ -0,0 +1,519 @@ +/* Optimized sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^27 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] sinf (float [fp1] x) */ + + .machine power8 +EALIGN(__sinf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + rldicl r4,r7,62,63 /* ((n+1) >> 2) & 1 */ + + /* We are operating on |x|, so we need to add back the original + sign. */ + rldicl r8,r8,33,63 /* (x >> 31) & 1, ie the sign bit. */ + xor r4,r4,r8 /* 0 if result should be positive, + 1 if negative. */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + x+x^3*(SS0+x^2*SS1). */ + + lfd fp10,(L(SS0)-L(anchor))(r9) + lfd fp11,(L(SS1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp11,fp10 /* SS0+x^2*SS1 */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(SS0+x^2*SS1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + cmpwi r3,0 + beq L(zero) + + /* Handle some special cases: + + sinf(subnormal) raises inexact/underflow + sinf(min_normalized) raises inexact/underflow + sinf(normalized) raises inexact. */ + + lfd fp2,(L(small)-L(anchor))(r9) + + fmul fp2,fp1,fp2 /* x * small */ + fsub fp1,fp1,fp2 /* x - x * small */ + + frsp fp1,fp1 + + blr + + .balign 16 +L(zero): + blr + +END (__sinf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for sin, range 2^-27 - 2^-5. */ +L(SS0): .8byte 0xbfc555555543d49d +L(SS1): .8byte 0x3f8110f475cec8c5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S new file mode 100644 index 0000000000..46b9c0067a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S @@ -0,0 +1,1447 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* First string arg. */ +#define rSTR2 r4 /* Second string arg. */ +#define rN r5 /* Max string length. */ +#define rWORD1 r6 /* Current word in s1. */ +#define rWORD2 r7 /* Current word in s2. */ +#define rWORD3 r8 /* Next word in s1. */ +#define rWORD4 r9 /* Next word in s2. */ +#define rWORD5 r10 /* Next word in s1. */ +#define rWORD6 r11 /* Next word in s2. */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* Next word in s1. */ +#define rWORD8 r31 /* Next word in s2. */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r10, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 8 + clrldi. r0, r10, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 + /* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + bne L(unalignedqw) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. */ + + .align 4 +L(samealignment): + or r11, rSTR2, rSTR1 + clrldi. r11, r11, 60 + beq L(qw_align) + /* Try to align to QW else proceed to DW loop. */ + clrldi. r10, r10, 60 + bne L(DW) + /* For the difference to reach QW alignment, load as DW. */ + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + subfic r10, r12, 8 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + sldi r9, r10, 3 + subfic r9, r9, 64 + sld rWORD1, rWORD1, r9 + sld rWORD2, rWORD2, r9 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(ret_diff) + subf rN, r10, rN + + cmpld cr6, r11, r12 + bgt cr6, L(qw_align) + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(different) + cmpldi cr6, rN, 8 + ble cr6, L(zeroLength) + addi rN, rN, -8 + /* Now both rSTR1 and rSTR2 are aligned to QW. */ + .align 4 +L(qw_align): + vspltisb v0, 0 + srdi. r6, rN, 6 + li r8, 16 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64) + mtctr r6 + vspltisb v8, 0 + vspltisb v6, 0 + /* Aligned vector loop. */ + .align 4 +L(aligned_loop): + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r8 + lvx v8, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + lvx v4, rSTR1, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r11 + lvx v8, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(aligned_loop) + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + clrldi rN, rN, 58 + /* Handle remainder for aligned loop. */ + .align 4 +L(lessthan64): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v5, 0, rSTR2 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v5, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + lvx v5, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + + /* Calculate and return the difference. */ + .align 4 +L(different1): + cmpdi cr6, rN, 16 + bge cr6, L(different2) + /* Discard unwanted bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v1, 0, rN + vperm v4, v4, v0, v1 + vperm v5, v5, v0, v1 +#else + lvsl v1, 0, rN + vperm v4, v0, v4, v1 + vperm v5, v0, v5, v1 +#endif + vcmpequb. v7, v4, v5 + li rRTN, 0 + bltlr cr6 + .align 4 +L(different2): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + lvsl v10, r0, r0 + vspltisb v8, 15 + vsububm v9, v8, v10 + vperm v4, v4, v0, v9 + vperm v5, v5, v0, v9 +#endif + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v4, v4, v4, 8 + vsldoi v5, v5, v5, 8 + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 +L(ret_diff): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(different3): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + vspltisb v9, 15 + lvsl v10, r0, r0 + vsububm v9, v9, v10 + vperm v6, v6, v0, v9 + vperm v8, v8, v0, v9 +#endif + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v6, v6, v6, 8 + vsldoi v8, v8, v8, 8 + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(different): + cmpldi cr7, rN, 8 + bgt cr7, L(end) + /* Skip unwanted bytes. */ + sldi r8, rN, 3 + subfic r8, r8, 64 + srd rWORD1, rWORD1, r8 + srd rWORD2, rWORD2, r8 + cmpld cr6, rWORD1, rWORD2 + li rRTN, 0 + beqlr cr6 +L(end): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(unalignedqw): + /* Proceed to DW unaligned loop,if there is a chance of pagecross. */ + rldicl r9, rSTR1, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + rldicl r9, rSTR2, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + li r0, 0 + li r8, 16 + vspltisb v0, 0 + /* Check if rSTR1 is aligned to QW. */ + andi. r11, rSTR1, 0xF + beq L(s1_align) + + /* Compare 16B and align S1 to QW. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + lvsr v6, 0, rSTR2 /* Compute mask. */ +#else + lvsl v10, 0, rSTR1 /* Compute mask. */ + lvsl v6, 0, rSTR2 /* Compute mask. */ +#endif + lvx v5, 0, rSTR2 + lvx v9, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif + lvx v4, 0, rSTR1 + lvx v9, rSTR1, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + cmpldi cr6, rN, 16 + ble cr6, L(zeroLength) + subfic r11, r11, 16 + subf rN, r11, rN + add rSTR1, rSTR1, r11 + add rSTR2, rSTR2, r11 + + /* As s1 is QW aligned prepare for unaligned loop. */ + .align 4 +L(s1_align): +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + lvx v5, 0, rSTR2 + srdi. r6, rN, 6 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64_unalign) + mtctr r6 + li r9, 64 + /* Unaligned vector loop. */ + .align 4 +L(unalign_qwloop): + lvx v4, 0, rSTR1 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r11 + lvx v10, rSTR2, r9 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(unalign_qwloop) + clrldi rN, rN, 58 + /* Handle remainder for unaligned loop. */ + .align 4 +L(lessthan64_unalign): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + addi r11, r11, 16 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + +/* Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(DW): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8. */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16. */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24. */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder. */ + srdi r0, rN, 5 /* Divide by 32. */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8. */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16. */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24. */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8. */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24. */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32. */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder. */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8. */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24. */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S new file mode 100644 index 0000000000..bc734c9f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S @@ -0,0 +1,458 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */ + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + + /* No need to use .machine power8 since mtvsrd is already + handled by the define. It avoid breakage on binutils + that does not support this machine specifier. */ + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,r5,31 + neg r0,r3 + mr r10,r3 + + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 /* Replicate byte to word. */ + ble cr7,L(write_LT_32) + + andi. r11,r10,15 /* Check alignment of DST. */ + insrdi r4,r4,32,0 /* Replicate word to double word. */ + + beq L(big_aligned) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + + /* Get DST aligned to 16 bytes. */ +1: bf 31,2f + stb r4,0(r10) + addi r10,r10,1 + +2: bf 30,4f + sth r4,0(r10) + addi r10,r10,2 + +4: bf 29,8f + stw r4,0(r10) + addi r10,r10,4 + +8: bf 28,16f + std r4,0(r10) + addi r10,r10,8 + +16: subf r5,r0,r5 + + .align 4 +L(big_aligned): + /* For sizes larger than 255 two possible paths: + - if constant is '0', zero full cache lines with dcbz + - otherwise uses vector instructions. */ + cmpldi cr5,r5,255 + dcbtst 0,r10 + cmpldi cr6,r4,0 + crand 27,26,21 + bt 27,L(huge_dcbz) + bge cr5,L(huge_vector) + + + /* Size between 32 and 255 bytes with constant different than 0, use + doubleword store instruction to achieve best throughput. */ + srdi r8,r5,5 + clrldi r11,r5,59 + cmpldi cr6,r11,0 + cmpdi r8,0 + beq L(tail_bytes) + mtctr r8 + + /* Main aligned write loop, writes 32-bytes at a time. */ + .align 4 +L(big_loop): + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + bdz L(tail_bytes) + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,10,32 + bdnz L(big_loop) + + b L(tail_bytes) + + /* Write remaining 1~31 bytes. */ + .align 4 +L(tail_bytes): + beqlr cr6 + + srdi r7,r11,4 + clrldi r8,r11,60 + mtocrf 0x01,r7 + + .align 4 + bf 31,8f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +8: mtocrf 0x1,r8 + bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw 4,0(10) + addi 10,10,4 + + .align 4 +2: bf 30,1f + sth 4,0(10) + addi 10,10,2 + + .align 4 +1: bflr 31 + stb 4,0(10) + blr + + /* Size larger than 255 bytes with constant different than 0, use + vector instruction to achieve best throughput. */ +L(huge_vector): + /* Replicate set byte to quadword in VMX register. */ + MTVSRD_V1_R4 + xxpermdi 32,v0,v1,0 + vspltb v2,v0,15 + + /* Main aligned write loop: 128 bytes at a time. */ + li r6,16 + li r7,32 + li r8,48 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail) + mtctr r12 + b L(aligned_128loop) + + .align 4 +L(aligned_128loop): + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + bdnz L(aligned_128loop) + + /* Write remaining 1~127 bytes. */ +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + +32: bf 26,16f + stvx v2,0,r10 + stvx v2,r10,r6 + addi r10,r10,32 + +16: bf 27,8f + stvx v2,0,r10 + addi r10,r10,16 + +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + /* Copies 4~7 bytes. */ +4: bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + /* Return original DST pointer. */ + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out a full cacheline of 128 bytes at a time. + Before using dcbz though, we need to get the destination 128-byte + aligned. */ + .align 4 +L(huge_dcbz): + andi. r11,r10,127 + neg r0,r10 + beq L(huge_dcbz_aligned) + + clrldi r0,r0,57 + subf r5,r0,r5 + srdi r0,r0,3 + mtocrf 0x01,r0 + + /* Write 1~128 bytes until DST is aligned to 128 bytes. */ +8: bf 28,4f + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 29,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 30,1f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +1: bf 31,L(huge_dcbz_aligned) + std r4,0(r10) + addi r10,r10,8 + +L(huge_dcbz_aligned): + /* Setup dcbz unroll offsets and count numbers. */ + srdi r8,r5,9 + clrldi r11,r5,55 + cmpldi cr6,r11,0 + li r9,128 + cmpdi r8,0 + beq L(huge_tail) + li r7,256 + li r6,384 + mtctr r8 + + .align 4 +L(huge_loop): + /* Sets 512 bytes to zero in each iteration, the loop unrolling shows + a throughput boost for large sizes (2048 bytes or higher). */ + dcbz 0,r10 + dcbz r9,r10 + dcbz r7,r10 + dcbz r6,r10 + addi r10,r10,512 + bdnz L(huge_loop) + + beqlr cr6 + +L(huge_tail): + srdi r6,r11,8 + srdi r7,r11,4 + clrldi r8,r11,4 + cmpldi cr6,r8,0 + mtocrf 0x01,r6 + + beq cr6,L(tail) + + /* We have 1~511 bytes remaining. */ + .align 4 +32: bf 31,16f + dcbz 0,r10 + dcbz r9,r10 + addi r10,r10,256 + + .align 4 +16: mtocrf 0x01,r7 + bf 28,8f + dcbz 0,r10 + addi r10,r10,128 + + .align 4 +8: bf 29,4f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 30,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 31,L(tail) + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + .align 4 + + /* Remaining 1~15 bytes. */ +L(tail): + mtocrf 0x01,r8 + + .align +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw r4,0(r10) + addi r10,r10,4 + + .align 4 +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handle short copies of 0~31 bytes. Best throughput is achieved + by just unrolling all operations. */ + .align 4 +L(write_LT_32): + cmpldi cr6,5,8 + mtocrf 0x01,r5 + ble cr6,L(write_LE_8) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(write_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,r0 + subf r5,r0,r5 + +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + +1: bf 31,L(end_4bytes_alignment) + stb r4,0(r10) + addi r10,r10,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(write_LT_32_aligned): + blt cr1,8f + + stw r4,0(r10) + stw r4,4(r10) + stw r4,8(r10) + stw r4,12(r10) + addi r10,r10,16 + +8: bf 28,L(tail4) + stw r4,0(r10) + stw r4,4(r10) + addi r10,r10,8 + + .align 4 + /* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + blr + + .align 4 + /* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + sth r4,0(r10) + bflr 31 + stb r4,2(r10) + blr + + .align 4 +L(tail5): + bflr 31 + stb r4,4(r10) + blr + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(write_LE_8): + bne cr6,L(tail4) + + stw r4,0(r10) + stw r4,4(r10) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies new file mode 100644 index 0000000000..1fc7b7cd39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S new file mode 100644 index 0000000000..955e738cee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S @@ -0,0 +1,24 @@ +/* Optimized stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPCPY +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S new file mode 100644 index 0000000000..c14d984dd0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S new file mode 100644 index 0000000000..88b17a6eb1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S @@ -0,0 +1,457 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ + +#ifndef USE_AS_STRNCASECMP +# define __STRCASECMP __strcasecmp +# define STRCASECMP strcasecmp +#else +# define __STRCASECMP __strncasecmp +# define STRCASECMP strncasecmp +#endif +/* Convert 16 bytes to lowercase and compare */ +#define TOLOWER() \ + vaddubm v8, v4, v1; \ + vaddubm v7, v4, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v4, v7, v4, v8; \ + vaddubm v8, v5, v1; \ + vaddubm v7, v5, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v5, v7, v5, v8; \ + vcmpequb. v7, v5, v4; + +/* + * Get 16 bytes for unaligned case. + * reg1: Vector to hold next 16 bytes. + * reg2: Address to read from. + * reg3: Permute control vector. + * v8: Tmp vector used to mask unwanted bytes. + * v9: Tmp vector,0 when null is found on first 16 bytes + */ +#ifdef __LITTLE_ENDIAN__ +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, v8, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; +#else +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, reg1, v8, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, reg1, v9, reg3; +#endif + +/* Check null in v4, v5 and convert to lower. */ +#define CHECKNULLANDCONVERT() \ + vcmpequb. v7, v0, v5; \ + beq cr6, 3f; \ + vcmpequb. v7, v0, v4; \ + beq cr6, 3f; \ + b L(null_found); \ + .align 4; \ +3: \ + TOLOWER() + +#ifdef _ARCH_PWR8 +# define VCLZD_V8_v7 vclzd v8, v7; +# define MFVRD_R3_V1 mfvrd r3, v1; +# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; +# define VPOPCNTD_V8_V8 vpopcntd v8, v8; +# define VADDUQM_V7_V8 vadduqm v9, v7, v8; +#else +# define VCLZD_V8_v7 .long 0x11003fc2 +# define MFVRD_R3_V1 .long 0x7c230067 +# define VSUBUDM_V9_V8 .long 0x112944c0 +# define VPOPCNTD_V8_V8 .long 0x110047c3 +# define VADDUQM_V7_V8 .long 0x11274100 +#endif + + .machine power7 + +ENTRY (__STRCASECMP) +#ifdef USE_AS_STRNCASECMP + CALL_MCOUNT 3 +#else + CALL_MCOUNT 2 +#endif +#define rRTN r3 /* Return value */ +#define rSTR1 r10 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, rRTN, rSTR2 + + /* Get locale address. */ + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) + + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) +#endif + vspltisb v0, 0 + vspltisb v8, -1 + /* Check for null in initial characters. + Check max of 16 char depending on the alignment. + If null is present, proceed byte by byte. */ + lvx v4, 0, rSTR1 +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ +#else + lvsl v10, 0, rSTR1 + vperm v9, v4, v8, v10 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + lvx v5, 0, rSTR2 + /* Calculate alignment. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 + vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ +#else + lvsl v6, 0, rSTR2 + vperm v9, v5, v8, v6 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + /* Check if locale has non ascii characters. */ + ld rTMP, 0(rLOC) + addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz rTMP, 0(r6) + cmpdi cr7, rTMP, 1 + beq cr7, L(bytebybyte) + + /* Load vector registers with values used for TOLOWER. */ + /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ + vspltisb v3, 2 + vspltisb v9, 4 + vsl v3, v3, v9 + vaddubm v1, v3, v3 + vnor v1, v1, v1 + vspltisb v2, 7 + vsububm v2, v3, v2 + + andi. rADDR1, rSTR1, 0xF + beq cr0, L(align) + addi r6, rSTR1, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif +L(align): + andi. rADDR2, rSTR2, 0xF + beq cr0, L(align1) + addi r6, rSTR2, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif +L(align1): + CHECKNULLANDCONVERT() + blt cr6, L(match) + b L(different) + .align 4 +L(match): + clrldi r6, rSTR1, 60 + subfic r7, r6, 16 +#ifdef USE_AS_STRNCASECMP + sub r5, r5, r7 +#endif + add rSTR1, rSTR1, r7 + add rSTR2, rSTR2, r7 + andi. rADDR2, rSTR2, 0xF + addi rSTR1, rSTR1, -16 + addi rSTR2, rSTR2, -16 + beq cr0, L(aligned) +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, check for null, + convert to lowercase and compare. Loop till difference + or null occurs. */ +L(s1_align): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + GET16BYTES(v5, rSTR2, v6) + CHECKNULLANDCONVERT() + blt cr6, L(s1_align) + b L(different) + .align 4 +L(aligned): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + CHECKNULLANDCONVERT() + blt cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + vaddubm v1, v3, v3 + vcmpequb v7, v0, v7 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) + vsro v8, v8, v1 +#endif + b L(skipsum) + .align 4 +L(shift8): + vsumsws v8, v8, v0 +L(skipsum): +#ifdef __LITTLE_ENDIAN__ + /* Shift registers based on leading zero count. */ + vsro v6, v5, v8 + vsro v7, v4, v8 + /* Merge and move to GPR. */ + vmrglb v6, v6, v7 + vslo v1, v6, v1 + MFVRD_R3_V1 + /* Place the characters that are different in first position. */ + sldi rSTR2, rRTN, 56 + srdi rSTR2, rSTR2, 56 + sldi rSTR1, rRTN, 48 + srdi rSTR1, rSTR1, 56 +#else + vslo v6, v5, v8 + vslo v7, v4, v8 + vmrghb v1, v6, v7 + MFVRD_R3_V1 + srdi rSTR2, rRTN, 48 + sldi rSTR2, rSTR2, 56 + srdi rSTR2, rSTR2, 56 + srdi rSTR1, rRTN, 56 +#endif + subf rRTN, rSTR1, rSTR2 + extsw rRTN, rRTN + blr + + .align 4 + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of junk beyond + the end of the strings... */ +L(null_found): + vaddubm v10, v3, v3 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) + vsro v8, v8, v10 +#endif + b L(skipsum1) + .align 4 +L(shift_8): + vsumsws v8, v8, v0 +L(skipsum1): + /* Calculate shift count based on count of zero. */ + vspltisb v10, 7 + vslb v10, v10, v10 + vsldoi v9, v0, v10, 1 + VSUBUDM_V9_V8 + vspltisb v8, 8 + vsldoi v8, v0, v8, 1 + VSUBUDM_V9_V8 + /* Shift and remove junk after null character. */ +#ifdef __LITTLE_ENDIAN__ + vslo v5, v5, v9 + vslo v4, v4, v9 +#else + vsro v5, v5, v9 + vsro v4, v4, v9 +#endif + /* Convert and compare 16 bytes. */ + TOLOWER() + blt cr6, L(retnull) + b L(different) + .align 4 +L(retnull): + li rRTN, 0 + blr + .align 4 +L(bytebybyte): + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + rldicl rTMP, r5, 62, 2 + cmpdi cr7, rTMP, 0 + beq cr7, L(lessthan4) + mtctr rTMP +#endif +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + bdnz L(loop) +#else + b L(loop) +#endif +#ifdef USE_AS_STRNCASECMP +L(lessthan4): + clrldi r5, r5, 62 + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + mtctr r5 +L(loop1): + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + addi rSTR1, rSTR1, 1 + addi rSTR2, rSTR2, 1 + lbz rCHAR1, 0(rSTR1) + lbz rCHAR2, 0(rSTR2) + bdnz L(loop1) +#endif +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCASECMP) + +weak_alias (__STRCASECMP, STRCASECMP) +libc_hidden_builtin_def (__STRCASECMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c new file mode 100644 index 0000000000..0e746b7718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c @@ -0,0 +1,29 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +#undef weak_alias +#define weak_alias(a,b) +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S new file mode 100644 index 0000000000..6ac6572f3b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S @@ -0,0 +1,538 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* Char * [r3] strcasestr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained by comparing 16 bytes. */ + +/* When the first char of r4 is hit ITERATIONS times in r3 + fallback to default. */ +#define ITERATIONS 64 + +#ifndef STRCASESTR +# define STRCASESTR __strcasestr +#endif + +#ifndef STRLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +/* Convert 16 bytes of v4 and reg to lowercase and compare. */ +#define TOLOWER(reg) \ + vcmpgtub v6, v4, v1; \ + vcmpgtub v7, v2, v4; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor v4, v8, v4; \ + vcmpgtub v6, reg, v1; \ + vcmpgtub v7, v2, reg; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor reg, v8, reg; \ + vcmpequb. v6, reg, v4; + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#ifdef _ARCH_PWR8 +#define VCLZD_V8_v7 vclzd v8, v7; +#else +#define VCLZD_V8_v7 .long 0x11003fc2 +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRCASESTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r27, -40(r1) /* Save callers register r27. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r29, -24) + cfi_offset(r28, -32) + cfi_offset(r27, -40) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 /* Input validation. */ + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + /* Load first byte from r4 and check if its null. */ + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(ret_r3) + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r9, LOCALE_CTYPE_TOUPPER(r9) + sldi r10, r6, 2 /* Convert to upper case. */ + lwzx r28, r9, r10 + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + sldi r10, r6, 2 /* Convert to lower case. */ + lwzx r27, r11, r10 + + /* Check if the first char is present. */ + mr r4, r27 + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + mr r4, r28 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(firstpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck) + cmpw cr7, r3, r29 + ble cr7, L(firstpos) + /* Move r3 to the first occurence. */ +L(skipcheck): + mr r3, r29 +L(firstpos): + mr r29, r3 + + sldi r9, r27, 8 + or r28, r9, r28 + /* Reg r27 is used to count the number of iterations. */ + li r27, 0 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + + /* Find the length of pattern. */ + mr r3, r30 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + + mr r3, r29 + + /* Locales not matching ASCII for single bytes. */ + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r7, 0(r9) + addi r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz r8, 0(r7) + cmpdi cr7, r8, 1 + beq cr7, L(bytebybyte) + + /* If len(r4) < 16 handle byte by byte. */ + /* For shorter strings we will not use vector registers. */ + cmpdi cr7, r31, 16 + blt cr7, L(bytebybyte) + + /* Comparison values used for TOLOWER. */ + /* Load v1 = 64('A' - 1), v2 = 91('Z' + 1), v3 = 32 in each byte. */ + vspltish v0, 0 + vspltisb v5, 2 + vspltisb v4, 4 + vsl v3, v5, v4 + vaddubm v1, v3, v3 + vspltisb v5, 15 + vaddubm v2, v5, v5 + vaddubm v2, v1, v2 + vspltisb v4, -3 + vaddubm v2, v2, v4 + + /* + 1. Load 16 bytes from r3 and r4 + 2. Check if there is null, If yes, proceed byte by byte path. + 3. Else,Convert both to lowercase and compare. + 4. If they are same proceed to 1. + 5. If they dont match, find if first char of r4 is present in the + loaded 16 byte of r3. + 6. If yes, move position, load next 16 bytes of r3 and proceed to 2. + */ + + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + clrldi r10, r4, 60 + lvx v5, 0, r4 /* Load 16 bytes from r4. */ + cmpdi cr7, r10, 0 + beq cr7, L(begin2) + /* If r4 is unaligned, load another 16 bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v7 +#else + vperm v5, v5, v9, v7 +#endif +L(begin2): + lvx v4, 0, r3 + vcmpequb. v7, v0, v4 /* Check for null. */ + beq cr6, L(nullchk6) + b L(trailcheck) + + .align 4 +L(nullchk6): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(next16) +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif +L(next16): + vcmpequb. v6, v0, v5 /* Check for null. */ + beq cr6, L(nullchk) + b L(trailcheck) + + .align 4 +L(nullchk): + vcmpequb. v6, v0, v4 + beq cr6, L(nullchk1) + b L(retnull) + + .align 4 +L(nullchk1): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v5) + /* If both are same, branch to match. */ + blt cr6, L(match) + /* Find if the first char is present in next 15 bytes. */ +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v5, 15 + vsldoi v7, v0, v4, 15 +#else + vspltb v6, v5, 0 + vspltisb v7, 8 + vslo v7, v4, v7 +#endif + vcmpequb v7, v6, v7 + vcmpequb. v6, v0, v7 + /* Shift r3 by 16 bytes and proceed. */ + blt cr6, L(shift16) + VCLZD_V8_v7 +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v8, 15 +#else + vspltb v6, v8, 7 +#endif + vcmpequb. v6, v6, v1 + /* Shift r3 by 8 bytes and proceed. */ + blt cr6, L(shift8) + b L(begin) + + .align 4 +L(match): + /* There is a match of 16 bytes, check next bytes. */ + cmpdi cr7, r31, 16 + mr r29, r3 + beq cr7, L(ret_r3) + +L(secondmatch): + addi r3, r3, 16 + addi r4, r4, 16 + /* Load next 16 bytes of r3 and r4 and compare. */ + clrldi r10, r4, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload) + /* Handle unaligned case. */ + vor v6, v9, v9 + vcmpequb. v7, v0, v6 + beq cr6, L(nullchk2) + b L(trailcheck) + + .align 4 +L(nullchk2): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + /* If r4 is unaligned, load another 16 bytes. */ + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v11, v9, v6, v7 +#else + vperm v11, v6, v9, v7 +#endif + b L(compare) + + .align 4 +L(nextload): + lvx v11, 0, r4 +L(compare): + vcmpequb. v7, v0, v11 + beq cr6, L(nullchk3) + b L(trailcheck) + + .align 4 +L(nullchk3): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload1) + /* Handle unaligned case. */ + vor v4, v10, v10 + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk4) + b L(retnull) + + .align 4 +L(nullchk4): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif + b L(compare1) + + .align 4 +L(nextload1): + lvx v4, 0, r3 +L(compare1): + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk5) + b L(retnull) + + .align 4 +L(nullchk5): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v11) + /* If both are same, branch to secondmatch. */ + blt cr6, L(secondmatch) + /* Continue the search. */ + b L(begin) + + .align 4 +L(trailcheck): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) +L(loop2): + lbz r5, 0(r3) /* Load byte from r3. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + addi r3, r3, 1 + addi r4, r4, 1 /* Increment r4. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(begin) + b L(loop2) + + .align 4 +L(shift8): + addi r8, r8, 7 + b L(begin) + .align 4 +L(shift16): + addi r8, r8, 15 + .align 4 +L(begin): + addi r8, r8, 1 + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r4, r30 /* Restore r4. */ + b L(begin2) + + /* Handling byte by byte. */ + .align 4 +L(loop1): + mr r3, r8 + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r29, r8 + srdi r4, r28, 8 + /* Check if the first char is present. */ + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + sldi r4, r28, 56 + srdi r4, r4, 56 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(nextpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck1) + cmpw cr7, r3, r29 + ble cr7, L(nextpos) + /* Move r3 to first occurence. */ +L(skipcheck1): + mr r3, r29 +L(nextpos): + mr r29, r3 + cmpdi cr7, r3, 0 + ble cr7, L(retnull) +L(bytebybyte): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + mr r4, r30 /* Restore r4. */ + mr r8, r3 /* Save r3. */ + addi r8, r8, 1 + +L(loop): + addi r3, r3, 1 + lbz r5, 0(r3) /* Load byte from r3. */ + addi r4, r4, 1 /* Increment r4. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(loop1) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce r31 (len of r4) from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return point of match. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Substring was not found. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strcasestr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + cfi_restore(lr) + cfi_restore(r27) + cfi_restore(r28) + cfi_restore(r29) + cfi_restore(r30) + cfi_restore(r31) + mtlr r0 /* Branch to link register. */ + blr +END (STRCASESTR) + +weak_alias (__strcasestr, strcasestr) +libc_hidden_def (__strcasestr) +libc_hidden_builtin_def (strcasestr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S new file mode 100644 index 0000000000..e0c185c162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S @@ -0,0 +1,377 @@ +/* Optimized strchr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STRCHRNUL +# ifndef STRCHRNUL +# define FUNC_NAME __strchrnul +# else +# define FUNC_NAME STRCHRNUL +# endif +#else +# ifndef STRCHR +# define FUNC_NAME strchr +# else +# define FUNC_NAME STRCHR +# endif +#endif /* !USE_AS_STRCHRNUL */ + +/* int [r3] strchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +ENTRY (FUNC_NAME) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r5,r10,r11 + or r9,r6,r7 + or r12,r5,r9 + cmpdi cr7,r12,0 + beq cr7,L(vector) + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 +#ifdef USE_AS_STRCHRNUL + mr r5, r9 +#endif + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef USE_AS_STRCHRNUL + mr r10, r5 +#endif +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 +# ifndef USE_AS_STRCHRNUL + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +# endif +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ +# ifndef USE_AS_STRCHRNUL + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +# endif +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + /* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1,r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + /* One (or both) of the quadwords contains a c/null byte. */ + addi r3, r3, -32 +#ifndef USE_AS_STRCHRNUL + vcmpequb. v11, v0, v9 + blt cr6, L(no_match) +#endif + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + VBPERMQ(v6, v6, v10) + VBPERMQ(v7, v7, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 + vsldoi v7, v7, v7, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 + vsldoi v6, v6, v6, 6 + vsldoi v7, v7, v7, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v1, v3, v2 + vor v2, v6, v7 + vor v4, v1, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + /* Return NULL if null found before c. */ +#ifndef USE_AS_STRCHRNUL + lbz r4, 0(r3) + cmpdi cr7, r4, 0 + beq cr7, L(no_match) +#endif + blr + +#ifndef USE_AS_STRCHRNUL + .align 4 +L(no_match): + li r3,0 + blr +#endif + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr + .p2align 5 +L(vector1): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v8, -1 + vspltisb v0, 0 + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 +L(end1): + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (FUNC_NAME) + +#ifndef USE_AS_STRCHRNUL +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S new file mode 100644 index 0000000000..3bf4b275dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S @@ -0,0 +1,23 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCHRNUL 1 +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> + +weak_alias (__strchrnul,strchrnul) +libc_hidden_builtin_def (__strchrnul) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S new file mode 100644 index 0000000000..770484f1e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S @@ -0,0 +1,247 @@ +/* Optimized strcmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + size_t [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + +EALIGN (STRCMP, 4, 0) + li r0,0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7,r3,0,52 + rldicl r9,r4,0,52 + cmpldi cr7,r7,4096-16 + bgt cr7,L(pagecross_check) + cmpldi cr5,r9,4096-16 + bgt cr5,L(pagecross_check) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8,0(r3) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + addi r7,r3,16 + addi r4,r4,16 + +L(align_8b): + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ + rldicl r9,r7,0,61 /* source1 alignment to doubleword */ + subf r4,r9,r4 /* Adjust source2 address based on source1 + alignment. */ + rldicr r7,r7,0,60 /* Align source1 to doubleword. */ + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r9,r4,0x7 + bne cr0,L(loop_diff_align) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + .align 4 +L(loop_equal_align): + ld r8,8(r7) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,16(r7) + ld r10,16(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ldu r8,24(r7) + ldu r10,24(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + b L(loop_equal_align) + + /* A zero byte was found in r8 (s1 dword), r9 contains the cmpb + result and r10 the dword from s2. To code isolate the byte + up to end (including the '\0'), masking with 0xFF the remaining + ones: + + #if __LITTLE_ENDIAN__ + (__builtin_ffsl (x) - 1) = counting trailing zero bits + r9 = (__builtin_ffsl (r9) - 1) + 8; + r9 = -1UL << r9 + #else + r9 = __builtin_clzl (r9) + 8; + r9 = -1UL >> r9 + #endif + r8 = r8 | r9 + r10 = r10 | r9 */ + +#ifdef __LITTLE_ENDIAN__ + nor r9,r9,r9 +L(different_nocmpb): + neg r3,r9 + and r9,r9,r3 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 +L(different_nocmpb): + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + extsw r3,r3 + blr + + .align 4 +L(pagecross_check): + subfic r9,r9,4096 + subfic r7,r7,4096 + cmpld cr7,r7,r9 + bge cr7,L(pagecross) + mr r7,r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7,r3,r7 + subf r9,r3,r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9,0(r3) + lbz r10,0(r4) + addi r3,r3,1 + addi r4,r4,1 + cmplw cr7,r9,r10 + cmpdi cr5,r9,r0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align_8b) + + .align 4 + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load, the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ +L(check_source2_byte): + li r9,8 + mtctr r9 + + .align 4 +L(check_source2_byte_loop): + lbz r9,0(r7) + lbz r10,0(r4) + addi r7,r7,1 + addi r4,r4,1 + cmplw cr7,r9,10 + cmpdi r5,r9,0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(check_source2_byte_loop) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 5 +L(loop_unaligned): + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + addi r7,r7,8 + addi r4,r4,8 + +L(loop_diff_align): + /* Check if [src2]+8 cross a 4k page boundary: + + srcin2 % PAGE_SIZE > (PAGE_SIZE - 8) + + with PAGE_SIZE being 4096. */ + rldicl r9,r4,0,52 + cmpldi cr7,r9,4088 + ble cr7,L(loop_unaligned) + b L(check_source2_byte) + + .align 4 +L(pagecross_ne): + extsw r3,r9 + mr r9,r10 +L(pagecross_retdiff): + subf r9,r9,r3 + extsw r3,r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3,0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S new file mode 100644 index 0000000000..7f2cee4b1b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S @@ -0,0 +1,270 @@ +/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPCPY +# ifndef STPCPY +# define FUNC_NAME __stpcpy +# else +# define FUNC_NAME STPCPY +# endif +#else +# ifndef STRCPY +# define FUNC_NAME strcpy +# else +# define FUNC_NAME STRCPY +# endif +#endif /* !USE_AS_STPCPY */ + +/* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + or + + char * [r3] stpcpy (char *dest [r3], const char *src [r4]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r9,r4,15 + xor r9,r9,r4 + rlwinm. r9,r9,0,19,19 + bne L(pagecross) + + /* For short string (less than 16 bytes), just calculate its size as + strlen and issues a memcpy if null is found. */ + mr r7,r4 + ld r12,0(r7) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r8,8(r7) + cmpb r10,r8,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + b L(loop_before) + + .align 4 +L(pagecross): + clrrdi r7,r4,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r4,3,26,28 /* Calculate padding. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r7) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ld r12,0(r7) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* We checked for 24 - x bytes, with x being the source alignment + (0 <= x <= 16), and no zero has been found. Start the loop + copy with doubleword aligned address. */ + mr r7,r4 + ld r12, 0(r7) + ldu r8, 8(r7) + +L(loop_before): + /* Save the two doublewords readed from source and align the source + to 16 bytes for the loop. */ + mr r11,r3 + std r12,0(r11) + std r8,8(r11) + addi r11,r11,16 + rldicl r9,r4,0,60 + subf r7,r9,r7 + subf r11,r9,r11 + b L(loop_start) + + .align 5 +L(loop): + std r12, 0(r11) + std r6, 8(r11) + addi r11,r11,16 +L(loop_start): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r7) + ldu r6, 16(r7) + cmpb r10,r12,r0 + cmpb r9,r6,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + addi r4,r7,-8 + cmpdi cr6,r10,0 + addi r7,r7,-8 + bne cr6,L(done2) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r7,r7,8 + b L(done2) + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): + mr r11,r3 +L(done2): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r6, r9 /* Count the bits in the mask. */ +#else + cntlzd r6,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r4,r7 + srdi r6,r6,3 /* Convert leading/trailing zeros to bytes. */ + add r8,r5,r6 /* Compute final length. */ +#ifdef USE_AS_STPCPY + /* stpcpy returns the dest address plus the size not counting the + final '\0'. */ + add r3,r11,r8 +#endif + addi r8,r8,1 /* Final '/0'. */ + + cmpldi cr6,r8,8 + mtocrf 0x01,r8 + ble cr6,L(copy_LE_8) + + cmpldi cr1,r8,16 + blt cr1,8f + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + /* At least 6 bytes to go. */ + blt cr1,8f + + /* Copy 16 bytes. */ + ld r6,0(r4) + ld r8,8(r4) + addi r4,r4,16 + std r6,0(r11) + std r8,8(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + ld r6,0(r4) + addi r4,r4,8 + std r6,0(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz r6,0(r4) + stw r6,0(r11) + bf 30,L(tail5) + lhz r7,4(r4) + sth r7,4(r11) + bflr 31 + lbz r8,6(r4) + stb r8,6(r11) + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz r6,0(r4) + sth r6,0(r11) + bflr 31 + lbz r7,2(r4) + stb r7,2(r11) + blr + + .align 4 +L(tail5): + bf 31,1f + lbz r6,4(r4) + stb r6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz r6,0(r4) + stb r6,0(r11) + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + ld r6,0(r4) + std r6,0(r11) + blr +END (FUNC_NAME) + +#ifndef USE_AS_STPCPY +libc_hidden_builtin_def (strcpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S new file mode 100644 index 0000000000..c9a7a2e3c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S @@ -0,0 +1,20 @@ +/* Optimized strcspn implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCSPN 1 +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S new file mode 100644 index 0000000000..8f4a1fc1dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S @@ -0,0 +1,301 @@ +/* Optimized strlen implementation for PowerPC64/POWER8 using a vectorized + loop. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRLEN, 4, 0) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + /* For shorter strings (< 64 bytes), we will not use vector registers, + as the overhead isn't worth it. So, let's use GPRs instead. This + will be done the same way as we do in the POWER7 implementation. + Let's see if we are aligned to a quadword boundary. If so, we can + jump to the first (non-vectorized) loop. Otherwise, we have to + handle the next DWORD first. */ + mtcrf 0x01,r4 + mr r9,r4 + addi r9,r9,8 + bt 28,L(align64) + + /* Handle the next 8 bytes so we are aligned to a quadword + boundary. */ + ldu r5,8(r4) + cmpb r10,r5,r0 + cmpdi cr7,r10,0 + addi r9,r9,8 + bne cr7,L(done) + +L(align64): + /* Proceed to the old (POWER7) implementation, checking two doublewords + per iteraction. For the first 56 bytes, we will just check for null + characters. After that, we will also check if we are 64-byte aligned + so we can jump to the vectorized implementation. We will unroll + these loops to avoid excessive branching. */ + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + /* Are we 64-byte aligned? If so, jump to the vectorized loop. + Note: aligning to 64-byte will necessarily slow down performance for + strings around 64 bytes in length due to the extra comparisons + required to check alignment for the vectorized loop. This is a + necessary tradeoff we are willing to take in order to speed up the + calculation for larger strings. */ + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + + /* At this point, we are necessarily 64-byte aligned. If no zeroes were + found, jump to the vectorized loop. */ + beq cr7,L(preloop) + +L(dword_zero): + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r11 + addi r4,r4,8 + + /* If the null byte was found in the non-vectorized code, compute the + final length. r10 has the output of the cmpb instruction, that is, + it contains 0xff in the same position as the null byte in the + original doubleword from the string. Use that to calculate the + length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr + + /* Vectorized implementation starts here. */ + .p2align 4 +L(preloop): + /* Set up for the loop. */ + mr r4,r9 + li r7, 16 /* Load required offsets. */ + li r8, 32 + li r9, 48 + li r12, 8 + vxor v0,v0,v0 /* VR with null chars to use with + vcmpequb. */ + + /* Main loop to look for the end of the string. We will read in + 64-byte chunks. Align it to 32 bytes and unroll it 3 times to + leverage the icache performance. */ + .p2align 5 +L(loop): + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + beq cr6,L(loop) + +L(vmx_zero): + /* OK, we found a null byte. Let's look for it in the current 64-byte + block and mark it in its corresponding VR. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* We will now 'compress' the result into a single doubleword, so it + can be moved to a GPR for the final calculation. First, we + generate an appropriate mask for vbpermq, so we can permute bits into + the first halfword. */ + vspltisb v10,3 + lvsl v11,r0,r0 + vslb v10,v11,v10 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + MFVRD(r10,v4) + + /* Adjust address to the begninning of the current 64-byte block. */ + addi r4,r4,-64 + +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + add r3,r5,r0 /* Compute final length. */ + blr + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S new file mode 100644 index 0000000000..32e09e4d94 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S @@ -0,0 +1,20 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRNCASECMP 1 +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S new file mode 100644 index 0000000000..3d8df90538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S @@ -0,0 +1,327 @@ +/* Optimized strncmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + mr. r10,r5 + beq cr0,L(ret0) + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + rldicl r8,r3,0,52 + cmpldi cr7,r8,4096-16 + bgt cr7,L(pagecross) + rldicl r9,r4,0,52 + cmpldi cr7,r9,4096-16 + bgt cr7,L(pagecross) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r7,0(r3) + ld r9,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + /* If the string compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7,r10,8 + li r9,0 + ble cr7,L(ret1) + addi r5,r10,-8 + + ld r7,8(r3) + ld r9,8(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different0) + + cmpldi cr7,r5,8 + mr r9,r8 + ble cr7,L(ret1) + + /* Update pointers and size. */ + addi r10,r10,-16 + addi r3,r3,16 + addi r4,r4,16 + + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ +L(align_8b): + rldicl r5,r3,0,61 + rldicr r3,r3,0,60 + subf r4,r5,r4 + add r10,r10,r5 + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r8,r4,0x7 + beq cr0,L(loop_eq_align_0) + + li r5,0 + b L(loop_ne_align_1) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 4 +L(loop_ne_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r10,r10,-8 + addi r3,r3,8 + addi r4,r4,8 +L(loop_ne_align_1): + rldicl r9,r4,0,52 + cmpldi r7,r9,4088 + ble cr7,L(loop_ne_align_0) + cmpdi cr7,r10,0 + beq cr7,L(ret0) + + lbz r9,0(r3) + lbz r8,0(r4) + cmplw cr7,r9,r8 + bne cr7,L(byte_ne_4) + cmpdi cr7,r9,0 + beq cr7,L(size_reached_0) + + li r9,r7 + addi r8,r3,1 + mtctr r9 + addi r4,r4,1 + addi r10,r10,-1 + addi r3,r3,8 + + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ + .align 4 +L(loop_ne_align_byte): + cmpdi cr7,r10,0 + addi r10,r10,-1 + beq cr7,L(ret0) + lbz r9,0(r8) + lbz r7,0(r4) + addi r8,r8,1 + addi r4,r4,1 + cmplw cr7,r9,r7 + cmpdi cr5,r9,0 + bne cr7,L(size_reached_2) + beq cr5,L(size_reached_0) + bdnz L(loop_ne_align_byte) + + cmpdi cr7,r10,0 + bne+ cr7,L(loop_ne_align_0) + + .align 4 +L(ret0): + li r9,0 +L(ret1): + mr r3,r9 + blr + + /* The code now check if r8 and r10 are different by issuing a + cmpb and shift the result based on its output: + + #ifdef __LITTLE_ENDIAN__ + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + #else + leadzero = __builtin_clzl (z1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> (56 - leadzero)) & 0xFFUL; + r2 = (r2 >> (56 - leadzero)) & 0xFFUL; + #endif + return r1 - r2; */ + + .align 4 +L(different0): + mr r10,r5 +#ifdef __LITTLE_ENDIAN__ +L(different1): + neg r11,r8 + sldi r10,r10,3 + and r8,r11,r8 + addi r10,r10,-8 + cntlzd r8,r8 + subfic r8,r8,63 + extsw r8,r8 + cmpld cr7,r8,r10 + ble cr7,L(different2) + mr r8,r10 +L(different2): + extsw r8,r8 +#else +L(different1): + addi r10,r10,-1 + cntlzd r8,r8 + sldi r10,r10,3 + cmpld cr7,r8,r10 + blt cr7,L(different2) + mr r8,r10 +L(different2): + subfic r8,r8,56 +#endif + srd r7,r7,r8 + srd r9,r9,r8 + rldicl r3,r7,0,56 + rldicl r9,r9,0,56 + subf r9,r9,3 + extsw r9,r9 + mr r3,r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7,0(r3) + lbz r9,0(r4) + subfic r8,r8,4095 + cmplw cr7,r9,r7 + bne cr7,L(byte_ne_3) + cmpdi cr7,r9,0 + beq cr7,L(byte_ne_0) + addi r10,r10,-1 + subf r7,r8,r10 + subf r9,r7,r10 + addi r9,r9,1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7,L(ret0) + lbz r9,0(r3) + lbz r8,0(r4) + addi r10,r10,-1 + cmplw cr7,r9,r8 + cmpdi cr5,r9,0 + bne r7,L(byte_ne_2) + beq r5,L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7,r10,0 + addi r3,r3,1 + addi r4,r4,1 + bdnz L(pagecross_loop0) + cmpdi cr7,r7,0 + li r9,0 + bne+ cr7,L(align_8b) + b L(ret1) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + .align 4 +L(loop_eq_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r9,r10,-9 + + li r5,0 + srdi r9,r9,3 + addi r9,r9,1 + mtctr r9 + b L(loop_eq_align_2) + + .align 4 +L(loop_eq_align_1): + bdz L(ret0) +L(loop_eq_align_2): + ldu r7,8(r3) + addi r10,r10,-8 + ldu r9,8(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + beq cr0,L(loop_eq_align_1) + b L(different1) + + .align 4 +L(byte_ne_0): + li r7,0 +L(byte_ne_1): + subf r9,r9,r7 + extsw r9,r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7,r9 + mr r9,r8 + b L(byte_ne_1) +L(size_reached_0): + li r10,0 +L(size_reached_1): + subf r9,r9,r10 + extsw r9,r9 + b L(ret1) +L(size_reached_2): + extsw r10,r9 + mr r9,r7 + b L(size_reached_1) +L(byte_ne_3): + extsw r7,r7 + b L(byte_ne_1) +L(byte_ne_4): + extsw r10,r9 + mr r9,r8 + b L(size_reached_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S new file mode 100644 index 0000000000..6d40f30ff7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S @@ -0,0 +1,465 @@ +/* Optimized strncpy/stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#ifndef MEMSET +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) + +/* Implements the function + + char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + or + + char * [r3] stpncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r10,r4,16 + rlwinm r9,r4,0,19,19 + + /* Save some non-volatile registers on the stack. */ + std r26,-48(r1) + std r27,-40(r1) + + rlwinm r8,r10,0,19,19 + + std r28,-32(r1) + std r29,-24(r1) + + cmpld cr7,r9,r8 + + std r30,-16(r1) + std r31,-8(r1) + + /* Update CFI. */ + cfi_offset(r26, -48) + cfi_offset(r27, -40) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(r30, -16) + cfi_offset(r31, -8) + + beq cr7,L(unaligned_lt_16) + rldicl r9,r4,0,61 + subfic r8,r9,8 + cmpld cr7,r5,r8 + bgt cr7,L(pagecross) + + /* At this points there is 1 to 15 bytes to check and write. Since it could + be either from first unaligned 16 bytes access or from bulk copy, the code + uses an unrolled byte read/write instead of trying to analyze the cmpb + results. */ +L(short_path): + mr r9,r3 +L(short_path_1): + /* Return if there are no more bytes to be written. */ + cmpdi cr7,r5,0 + beq cr7,L(short_path_loop_end_1) +L(short_path_2): + /* Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r10,0(r4) + cmpdi cr7,r10,0 + stb r10,0(r9) + beq cr7,L(zero_pad_start_1) + /* If there are no more bytes to be written, return. */ + cmpdi cr0,r5,1 + addi r8,r9,1 + addi r6,r5,-1 + beq cr0,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r10,1(r4) + cmpdi cr7,r10,0 + stb r10,1(r9) + beq cr7,L(zero_pad_start_prepare_1) + /* Eagerly decrement r5 by 3, which is the number of bytes already + written, plus one write that will be performed later on. */ + addi r10,r5,-3 + b L(short_path_loop_1) + + .align 4 +L(short_path_loop): + /* At this point, the induction variable, r5, as well as the pointers + to dest and src (r9 and r4, respectivelly) have been updated. + + Note: The registers r7 and r10 are induction variables derived from + r5. They are used to determine if the total number of writes has + been reached at every other write. + + Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r8,0(r4) + addi r7,r10,-2 + cmpdi cr5,r8,0 + stb r8,0(r9) + beq cr5,L(zero_pad_start_1) + beq cr7,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r8,1(r4) + cmpdi cr7,r8,0 + stb r8,1(r9) + beq cr7,L(zero_pad_start) + mr r10,r7 +L(short_path_loop_1): + /* This block is reached after two chars have been already written to + dest. Nevertheless, r5 (the induction variable), r9 (the pointer to + dest), and r4 (the pointer to src) have not yet been updated. + + At this point: + r5 holds the count of bytes yet to be written plus 2. + r9 points to the last two chars that were already written to dest. + r4 points to the last two chars that were already copied from src. + + The algorithm continues by decrementing r5, the induction variable, + so that it reflects the last two writes. The pointers to dest (r9) + and to src (r4) are increment by two, for the same reason. + + Note: Register r10 is another induction variable, derived from r5, + which determines if the total number of writes has been reached. */ + addic. r5,r5,-2 + addi r9,r9,2 + cmpdi cr7,r10,0 /* Eagerly check if the next write is the last. */ + addi r4,r4,2 + addi r6,r9,1 + bne cr0,L(short_path_loop) /* Check if the total number of writes + has been reached at every other + write. */ +#ifdef USE_AS_STPNCPY + mr r3,r9 + b L(short_path_loop_end) +#endif + +L(short_path_loop_end_0): +#ifdef USE_AS_STPNCPY + addi r3,r9,1 + b L(short_path_loop_end) +#endif +L(short_path_loop_end_1): +#ifdef USE_AS_STPNCPY + mr r3,r9 +#endif +L(short_path_loop_end): + /* Restore non-volatile registers. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* This code pads the remainder of dest with NULL bytes. The algorithm + calculates the remaining size and calls memset. */ + .align 4 +L(zero_pad_start): + mr r5,r10 + mr r9,r6 +L(zero_pad_start_1): + /* At this point: + - r5 holds the number of bytes that still have to be written to + dest. + - r9 points to the position, in dest, where the first null byte + will be written. + The above statements are true both when control reaches this label + from a branch or when falling through the previous lines. */ +#ifndef USE_AS_STPNCPY + mr r30,r3 /* Save the return value of strncpy. */ +#endif + /* Prepare the call to memset. */ + mr r3,r9 /* Pointer to the area to be zero-filled. */ + li r4,0 /* Byte to be written (zero). */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl MEMSET + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + +#ifndef USE_AS_STPNCPY + mr r3,r30 /* Restore the return value of strncpy, i.e.: + dest. For stpncpy, the return value is the + same as return value of memset. */ +#endif + + /* Restore non-volatile registers and return. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* The common case where [src]+16 will not cross a 4K page boundary. + In this case the code fast check the first 16 bytes by using doubleword + read/compares and update destiny if neither total size or null byte + is found in destiny. */ + .align 4 +L(unaligned_lt_16): + cmpldi cr7,r5,7 + ble cr7,L(short_path) + ld r7,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2) + addi r6,r5,-8 + std r7,0(r3) + addi r9,r3,8 + cmpldi cr7,r6,7 + addi r7,r4,8 + ble cr7,L(short_path_prepare_1_1) + ld r4,8(r4) + cmpb r8,r4,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2_1) + std r4,8(r3) + addi r29,r3,16 + addi r5,r5,-16 + /* Neither the null byte was found or total length was reached, + align to 16 bytes and issue a bulk copy/compare. */ + b L(align_to_16b) + + /* In the case of 4k page boundary cross, the algorithm first align + the address to a doubleword, calculate a mask based on alignment + to ignore the bytes and continue using doubleword. */ + .align 4 +L(pagecross): + rldicr r11,r4,0,59 /* Align the address to 8 bytes boundary. */ + li r6,-1 /* MASK = 0xffffffffffffffffUL. */ + sldi r9,r9,3 /* Calculate padding. */ + ld r7,0(r11) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r9,r6,r9 /* MASK = MASK << padding. */ +#else + srd r9,r6,r9 /* MASK = MASK >> padding. */ +#endif + orc r9,r7,r9 /* Mask bits that are not part of the + string. */ + li r7,0 + cmpb r9,r9,r7 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + subf r8,r8,r5 /* Adjust total length. */ + cmpldi cr7,r8,8 /* Check if length was reached. */ + ble cr7,L(short_path_prepare_2) + + /* For next checks we have aligned address, so we check for more + three doublewords to make sure we can read 16 unaligned bytes + to start the bulk copy with 16 aligned addresses. */ + ld r7,8(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r7,r8,-8 + cmpldi cr7,r7,8 + ble cr7,L(short_path_prepare_2) + ld r7,16(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r8,r8,-16 + cmpldi cr7,r8,8 + ble cr7,L(short_path_prepare_2) + ld r8,24(r11) + cmpb r9,r8,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + + /* No null byte found in the 32 bytes readed and length not reached, + read source again using unaligned loads and store them. */ + ld r9,0(r4) + addi r29,r3,16 + addi r5,r5,-16 + std r9,0(r3) + ld r9,8(r4) + std r9,8(r3) + + /* Align source to 16 bytes and adjust destiny and size. */ +L(align_to_16b): + rldicl r9,r10,0,60 + rldicr r28,r10,0,59 + add r12,r5,r9 + subf r29,r9,r29 + + /* The bulk read/compare/copy loads two doublewords, compare and merge + in a single register for speed. This is an attempt to speed up the + null-checking process for bigger strings. */ + + cmpldi cr7,r12,15 + ble cr7,L(short_path_prepare_1_2) + + /* Main loop for large sizes, unrolled 2 times to get better use of + pipeline. */ + ld r8,0(28) + ld r10,8(28) + li r9,0 + cmpb r7,r8,r9 + cmpb r9,r10,r9 + or. r6,r9,r7 + bne cr0,L(short_path_prepare_2_3) + addi r5,r12,-16 + addi r4,r28,16 + std r8,0(r29) + std r10,8(r29) + cmpldi cr7,r5,15 + addi r9,r29,16 + ble cr7,L(short_path_1) + mr r11,r28 + mr r6,r29 + li r30,0 + subfic r26,r4,48 + subfic r27,r9,48 + + b L(loop_16b) + + .align 4 +L(loop_start): + ld r31,0(r11) + ld r10,8(r11) + cmpb r0,r31,r7 + cmpb r8,r10,r7 + or. r7,r0,r8 + addi r5,r5,-32 + cmpldi cr7,r5,15 + add r4,r4,r26 + add r9,r9,r27 + bne cr0,L(short_path_prepare_2_2) + add r4,r28,r4 + std r31,0(r6) + add r9,r29,r9 + std r10,8(r6) + ble cr7,L(short_path_1) + +L(loop_16b): + ld r10,16(r11) + ld r0,24(r11) + cmpb r8,r10,r30 + cmpb r7,r0,r30 + or. r7,r8,r7 + addi r12,r12,-32 + cmpldi cr7,r12,15 + addi r11,r11,32 + bne cr0,L(short_path_2) + std r10,16(r6) + addi r6,r6,32 + std r0,-8(r6) + bgt cr7,L(loop_start) + + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_1) + + .align 4 +L(short_path_prepare_1_1): + mr r5,r6 + mr r4,r7 + b L(short_path_1) +L(short_path_prepare_1_2): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_1) +L(short_path_prepare_2): + mr r9,r3 + b L(short_path_2) +L(short_path_prepare_2_1): + mr r5,r6 + mr r4,r7 + b L(short_path_2) +L(short_path_prepare_2_2): + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_2) +L(short_path_prepare_2_3): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_2) +L(zero_pad_start_prepare_1): + mr r5,r6 + mr r9,r8 + b L(zero_pad_start_1) +END (FUNC_NAME) + +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S new file mode 100644 index 0000000000..3eadbfb09e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S @@ -0,0 +1,433 @@ +/* Optimized strnlen implementation for POWER8 using a vmx loop. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* It is implemented the following heuristic: + 1. Case maxlen <= 32: align the pointer to 8 bytes to loop through + reading doublewords. Uses the POWER7 algorithm. + 2. Case maxlen > 32: check for null bytes in the first 16 bytes using + unaligned accesses. Return length if found. Otherwise: + 2.1 Case maxlen < 64: deduct the bytes previously read, align + the pointer to 16 bytes and loop through reading quadwords + until find null bytes or reach maxlen. + 2.2 Case maxlen > 64: deduct the bytes previously read, align + the pointer to 64 bytes and set up a counter to loop through + reading in strides of 64 bytes. In case it finished the loop + with null bytes not found, process the remainder bytes by + switching to the loop to heuristic in 2.1. */ + +#include <sysdep.h> + +/* Define default page size to 4KB. */ +#define PAGE_SIZE 4096 + +/* The following macros implement Power ISA v2.07 opcodes + that could not be used directly into this code to the keep + compatibility with older binutils versions. */ + +/* Move from vector register doubleword. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Move to vector register doubleword. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Vector Bit Permute Quadword. */ +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Vector Population Count Halfword. */ +#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21))) + +/* Vector Count Leading Zeros Halfword. */ +#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21))) + + +/* int [r3] strnlen (char *s [r3], size_t maxlen [r4]) */ +/* TODO: change to power8 when minimum required binutils allows it. */ + .machine power7 +ENTRY (__strnlen) + CALL_MCOUNT 2 + dcbt 0,r3 + + cmpldi r4,32 /* Check if maxlen <= 32. */ + ble L(small_range) /* If maxlen <= 32. */ + + /* Upcoming 16 bytes unaligned accesses cannot cross the page boundary + otherwise the processor throws an memory access error. + Use following code to check there is room for such as accesses: + (((size_t) s) % PAGE_SIZE > (PAGE_SIZE - 16) + If it is disallowed then switch to the code that handles + the string when maxlen <= 32. */ + clrldi r10,r3,52 + cmpldi cr7,r10,PAGE_SIZE-16 + bgt cr7,L(small_range) /* If less than 16B of page end. */ + + /* Compute our permute constant r8. */ + li r7,0 + /* Compute a bpermd constant to move bit 0 of each word into + a halfword value, and count trailing zeros. */ +#ifdef __LITTLE_ENDIAN__ + li r8,0x2820 + oris r8,r8,0x3830 + sldi r8,r8,32 + ori r8,r8,0x0800 + oris r8,r8,0x1810 +#else + li r8,0x1018 + oris r8,r8,0x0008 + sldi r8,r8,32 + ori r8,r8,0x3038 + oris r8,r8,0x2028 +#endif + + /* maxlen > 32. Optimistically check for null bytes in the first + 16 bytes of the string using unaligned accesses. */ + ld r5,0(r3) + ld r6,8(r3) + cmpb r10,r7,r5 /* Check for null bytes in DWORD1. */ + cmpb r11,r7,r6 /* Check for null bytes in DWORD2. */ + or. r7,r10,r11 + bne cr0, L(early_find) /* If found null bytes. */ + + /* At this point maxlen > 32 and null bytes were not found at first + 16 bytes. Prepare for loop using VMX. */ + + /* r3 == s, r4 == maxlen. All other volatile regs are unused now. */ + + addi r5,r3,16 /* Align up, or just add the 16B we + already checked. */ + li r0,15 + and r7,r5,r0 /* Find offset into 16B alignment. */ + andc r5,r5,r0 /* Quadword align up s to the next quadword. */ + li r0,16 + subf r0,r7,r0 + subf r4,r0,r4 /* Deduct unaligned bytes from maxlen. */ + + + /* Compute offsets for vmx loads, and precompute the vbpermq + constants for both the 64B and 16B loops. */ + li r6,0 + vspltisb v0,0 + vspltisb v10,3 + lvsl v11,r6,r6 + vslb v10,v11,v10 + + cmpldi r4,64 /* Check maxlen < 64. */ + blt L(smaller) /* If maxlen < 64 */ + + /* In order to begin the 64B loop, it needs to be 64 + bytes aligned. So read quadwords until it is aligned or found null + bytes. At worst case it will be aligned after the fourth iteration, + so unroll the loop to avoid counter checking. */ + andi. r7,r5,63 /* Check if is 64 bytes aligned. */ + beq cr0,L(preloop_64B) /* If it is already 64B aligned. */ + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 /* Decrement maxlen in 16 bytes. */ + bne cr6,L(found_aligning64B) /* If found null bytes. */ + + /* Unroll 3x above code block until aligned or find null bytes. */ + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + /* At this point it should be 16 bytes aligned. + Prepare for the 64B loop. */ + .p2align 4 +L(preloop_64B): + /* Check if maxlen became is less than 64, therefore disallowing the + 64B loop. If it happened switch to the 16B loop code. */ + cmpldi r4,64 /* Check if maxlen < 64. */ + blt L(smaller) /* If maxlen < 64. */ + /* Set some constant values. */ + li r7,16 + li r10,32 + li r9,48 + + /* Compute the number of 64 bytes iterations needed. */ + srdi r11,r4,6 /* Compute loop count (maxlen / 64). */ + andi. r4,r4,63 /* Set maxlen the remainder (maxlen % 64). */ + mtctr r11 /* Move loop count to counter register. */ + + /* Handle maxlen > 64. Loop over the bytes in strides of 64B. */ + .p2align 4 +L(loop_64B): + lvx v1,r5,r6 /* r5 is the pointer to s. */ + lvx v2,r5,r7 + lvx v3,r5,r10 + lvx v4,r5,r9 + /* Compare the four 16B vectors to obtain the least 16 values. + Null bytes should emerge into v7, then check for null bytes. */ + vminub v5,v1,v2 + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for null bytes. */ + addi r5,r5,64 /* Add pointer to next iteraction. */ + bne cr6,L(found_64B) /* If found null bytes. */ + bdnz L(loop_64B) /* Continue the loop if count > 0. */ + +/* Hit loop end without null match. So branch to handle the remainder. */ + + /* Prepare a 16B loop to handle two cases: + 1. If 32 > maxlen < 64. + 2. If maxlen >= 64, and reached end of the 64B loop with null + bytes not found. Thus handle the remainder bytes here. */ + .p2align 4 +L(smaller): + cmpldi r4,0 /* Check maxlen is zero. */ + beq L(done) /* If maxlen is zero. */ + + /* Place rounded up number of qw's to check into a vmx + register, and use some vector tricks to minimize + branching. */ + MTVRD(v7,r4) /* Copy maxlen from GPR to vector register. */ + vspltisb v5,1 + vspltisb v6,15 + vspltb v2,v7,7 + vaddubs v3,v5,v6 + +#ifdef __LITTLE_ENDIAN__ + vspltish v5,1 /* Compute 16 in each byte. */ +#endif + + /* Loop in 16B aligned incremements now. */ + .p2align 4 +L(loop_16B): + lvx v1,r5,r6 /* Load quadword into vector register. */ + addi r5,r5,16 /* Increment address to next 16B block. */ + vor v7,v2,v2 /* Save loop count (v2) into v7. */ + vsububs v2,v2,v3 /* Subtract 16B from count, saturate at 0. */ + vminub v4,v1,v2 + vcmpequb. v4,v4,v0 /* Checking for null bytes. */ + beq cr6,L(loop_16B) /* If null bytes not found. */ + + vcmpequb v1,v1,v0 + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + vsubuhm v2,v1,v5 /* Form a mask of trailing zeros. */ + vandc v2,v2,v1 + VPOPCNTH(v1,v2) /* Count of trailing zeros, 16 if none. */ +#else + VCLZH(v1,v1) /* Count the leading zeros, 16 if none. */ +#endif + /* Truncate to maximum allowable offset. */ + vcmpgtub v2,v1,v7 /* Compare and truncate for matches beyond + maxlen. */ + vsel v1,v1,v7,v2 /* 0-16 is now in byte 7. */ + + MFVRD(r0,v1) + addi r5,r5,-16 /* Undo speculative bump. */ + extsb r0,r0 /* Clear whatever gunk is in the high 56b. */ + add r5,r5,r0 /* Add the offset of whatever was found. */ +L(done): + subf r3,r3,r5 /* Length is equal to the offset of null byte + matched minus the pointer to s. */ + blr /* Done. */ + + /* Handle case of maxlen > 64 and found null bytes in last block + of 64 bytes read. */ + .p2align 4 +L(found_64B): + /* A zero was found. Reduce the result. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + + /* Adjust address to the start of the current 64B block. */ + addi r5,r5,-64 + + MFVRD(r10,v4) +#ifdef __LITTLE_ENDIAN__ + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r5 + add r3,r5,r0 /* Compute final length. */ + blr /* Done. */ + + /* Handle case where null bytes were found while aligning + as a preparation for the 64B loop. */ + .p2align 4 +L(found_aligning64B): + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + MFVRD(r10,v1) + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + vsldoi v1,v1,v1,6 + MFVRD(r10,v1) + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + addi r5,r5,-16 /* Adjust address to offset of last 16 bytes + read. */ + /* Calculate length as subtracted the pointer to s of last 16 bytes + offset, added with the bytes before the match. */ + subf r5,r3,r5 + add r3,r5,r0 + blr /* Done. */ + + /* Handle case of maxlen > 32 and found a null bytes within the first + 16 bytes of s. */ + .p2align 4 +L(early_find): + bpermd r5,r8,r10 /* r8 contains the bit permute constants. */ + bpermd r6,r8,r11 + sldi r5,r5,8 + or r5,r5,r6 /* r5 should hold a 16B mask of + a potential 0. */ + cntlzd r5,r5 /* Count leading zeros. */ + addi r3,r5,-48 /* Deduct the 48 leading zeros always + present. */ + blr /* Done. */ + + /* Handle case of maxlen <= 32. Use the POWER7 algorithm. */ + .p2align 4 +L(small_range): + clrrdi r8,r3,3 /* Align the pointer to 8B. */ + li r0,0 + /* Register's content at this point: + r3 == pointer to s, r4 == maxlen, r8 == pointer to s aligned to 8B, + r7 == last acceptable address. */ + cmpldi r4,0 /* Check if maxlen is zero. */ + beq L(end_max) /* If maxlen is zero. */ + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r4 + r7 |= -(r7 < x) */ + add r7,r3,r4 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + addi r7,r7,-1 + + clrrdi r7,r7,3 /* Align to 8B address of last + acceptable address. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load aligned doubleword. */ + cmpb r10,r12,r0 /* Check for null bytes. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif /* __LITTLE_ENDIAN__ */ + cmpldi cr7,r10,0 + bne cr7,L(done_small) /* If found null byte. */ + + cmpld r8,r7 /* Check if reached maxlen. */ + beq L(end_max) /* If reached maxlen. */ + + /* Still handling case of maxlen <= 32. Read doubleword aligned until + find null bytes or reach maxlen. */ + .p2align 4 +L(loop_small): + ldu r12,8(r8) /* Load next doubleword and update r8. */ + cmpb r10,r12,r0 /* Check for null bytes. */ + cmpldi cr6,r10,0 + bne cr6,L(done_small) /* If found null bytes. */ + cmpld r8,r7 /* Check if reached maxlen. */ + bne L(loop_small) /* If it has more bytes to read. */ + mr r3,r4 /* Reached maxlen with null bytes not found. + Length is equal to maxlen. */ + blr /* Done. */ + + /* Still handling case of maxlen <= 32. Found null bytes. + Registers: r10 == match bits within doubleword, r8 == address of + last doubleword read, r3 == pointer to s, r4 == maxlen. */ + .p2align 4 +L(done_small): +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zeros. */ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 /* Calculate total of bytes before the match. */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 /* Check length is greater than maxlen. */ + blelr + mr r3,r4 /* If length is greater than maxlen, return + maxlen. */ + blr + + /* Handle case of reached maxlen with null bytes not found. */ + .p2align 4 +L(end_max): + mr r3,r4 /* Length is equal to maxlen. */ + blr /* Done. */ + + +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S new file mode 100644 index 0000000000..8eb74853c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S @@ -0,0 +1,464 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* char *[r3] strrchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VADDUQM(t,a,b) .long (0x10000100 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#ifdef __LITTLE_ENDIAN__ +/* Find the match position from v6 and place result in r6. */ +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + vsldoi v6, v6, v6, 6; \ + MFVRD(r7, v6); \ + cntlzd r6, r7; \ + subfic r6, r6, 15; +/* + * Find the first null position to mask bytes after null. + * (reg): vcmpequb result: v2 for 1st qw v3 for 2nd qw. + * Result placed at v2. + */ +# define FIND_NULL_POS(reg) \ + vspltisb v11, -1; \ + VADDUQM(v11, reg, v11); \ + vandc v11, v11, reg; \ + VPOPCNTD(v2, v11); \ + vspltb v11, v2, 15; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vslo v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#else +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + MFVRD(r7, v6); \ + addi r6, r7, -1; \ + andc r6, r6, r7; \ + popcntd r6, r6; \ + subfic r6, r6, 15; +# define FIND_NULL_POS(reg) \ + VCLZD(v2, reg); \ + vspltb v11, v2, 7; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vsro v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#endif /* !__LITTLE_ENDIAN__ */ + .machine power7 +ENTRY (strrchr) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* Used to store last occurence. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now. If it's passed more chars, then + check for null again. */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + andi. r12, r8, 15 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(vector) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* If there are more than one 0xff in r11, find the first position of + 0xff in r11 and fill r10 with 0 from that position. */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1, r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + vcmpequb. v8, v0, v8 + blt cr6, L(match) + + /* One (or both) of the quadwords contains c/null. */ + vspltisb v8, 2 + vspltisb v9, 5 + /* Precompute values used for comparison. */ + vsl v9, v8, v9 /* v9 = 0x4040404040404040. */ + vaddubm v8, v9, v9 + vsldoi v8, v0, v8, 1 /* v8 = 0x80. */ + + /* Check if null is in second qw. */ + vcmpequb. v11, v0, v2 + blt cr6, L(secondqw) + + /* Null found in first qw. */ + addi r8, r3, -32 + /* Calculate the null position. */ + FIND_NULL_POS(v2) + /* Check if null is in the first byte. */ + vcmpequb. v11, v0, v2 + blt cr6, L(no_match) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v6, v6, v2 + vsro v6, v6, v2 +#else + vsro v6, v6, v2 + vslo v6, v6, v2 +#endif + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(secondqw): + addi r8, r3, -16 + FIND_NULL_POS(v3) + vcmpequb. v11, v0, v2 + blt cr6, L(no_match1) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v7, v7, v2 + vsro v7, v7, v2 +#else + vsro v7, v7, v2 + vslo v7, v7, v2 +#endif + vcmpequb. v11, v0, v7 + blt cr6, L(no_match1) + addi r8, r8, 16 + vor v6, v0, v7 +L(no_match1): + addi r8, r8, -16 + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(match): + /* One (or both) of the quadwords contains a match. */ + mr r8, r3 + vcmpequb. v8, v0, v7 + blt cr6, L(firstqw) + /* Match found in second qw. */ + addi r8, r8, 16 + vor v6, v0, v7 +L(firstqw): + addi r8, r8, -32 + CALCULATE_MATCH() + add r9, r8, r6 /* Compute final length. */ + b L(continue) +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + andi. r12, r8, 15 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector1): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 + /* Compare 32 bytes in each loop. */ +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (strrchr) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S new file mode 100644 index 0000000000..e9271898f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S @@ -0,0 +1,202 @@ +/* Optimized strspn implementation for Power8. + + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4]) */ + +/* This takes a novel approach by computing a 256 bit mask whereby + each set bit implies the byte is "accepted". P8 vector hardware + has extremely efficient hardware for selecting bits from a mask. + + One might ask "why not use bpermd for short strings"? It is + so slow that its performance about matches the generic PPC64 + variant without any fancy masking, with the added expense of + making the mask. That was the first variant of this. */ + + + +#include "sysdep.h" + +#ifndef USE_AS_STRCSPN +# define USE_AS_STRCSPN 0 +# ifndef STRSPN +# define STRSPN strspn +# endif +# define INITIAL_MASK 0 +# define UPDATE_MASK(RA, RS, RB) or RA, RS, RB +#else +# ifndef STRSPN +# define STRSPN strcspn +# endif +# define INITIAL_MASK -1 +# define UPDATE_MASK(RA, RS, RB) andc RA, RS, RB +#endif + +/* Simple macro to use VSX instructions in overlapping VR's. */ +#define XXVR(insn, vrt, vra, vrb) \ + insn 32+vrt, 32+vra, 32+vrb + +/* ISA 2.07B instructions are not all defined for older binutils. + Macros are defined below for these newer instructions in order + to maintain compatibility. */ + +/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + + /* This can be updated to power8 once the minimum version of + binutils supports power8 and the above instructions. */ + .machine power7 +EALIGN(STRSPN, 4, 0) + CALL_MCOUNT 2 + + /* Generate useful constants for later on. */ + vspltisb v1, 7 + vspltisb v2, -1 + vslb v1, v1, v1 /* 0x80 to swap high bit for vbpermq. */ + vspltisb v10, 0 + vsldoi v4, v10, v2, 2 /* 0xFFFF into vr4. */ + XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches. */ + + /* Prepare to compute 256b mask. */ + addi r4, r4, -1 + li r5, INITIAL_MASK + li r6, INITIAL_MASK + li r7, INITIAL_MASK + li r8, INITIAL_MASK + +#if USE_AS_STRCSPN + /* Ensure the null character never matches by clearing ISA bit 0 in + in r5 which is the bit which will check for it in the later usage + of vbpermq. */ + srdi r5, r5, 1 +#endif + + li r11, 1 + sldi r11, r11, 63 + + /* Start interleaved Mask computation. + This will eventually or 1's into ignored bits from vbpermq. */ + lvsr v11, 0, r3 + vspltb v11, v11, 0 /* Splat shift constant. */ + + /* Build a 256b mask in r5-r8. */ + .align 4 +L(next_needle): + lbzu r9, 1(r4) + + cmpldi cr0, r9, 0 + cmpldi cr1, r9, 128 + + /* This is a little tricky. srd only uses the first 7 bits, + and if bit 7 is set, value is always 0. So, we can + effectively shift 128b in this case. */ + xori r12, r9, 0x40 /* Invert bit 6. */ + srd r10, r11, r9 /* Mask for bits 0-63. */ + srd r12, r11, r12 /* Mask for bits 64-127. */ + + beq cr0, L(start_cmp) + + /* Now, or the value into the correct GPR. */ + bge cr1,L(needle_gt128) + UPDATE_MASK (r5, r5, r10) /* 0 - 63. */ + UPDATE_MASK (r6, r6, r12) /* 64 - 127. */ + b L(next_needle) + + .align 4 +L(needle_gt128): + UPDATE_MASK (r7, r7, r10) /* 128 - 191. */ + UPDATE_MASK (r8, r8, r12) /* 192 - 255. */ + b L(next_needle) + + + .align 4 +L(start_cmp): + /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */ + mr r0, r3 /* Save r3 for final length computation. */ + MTVRD (v5, r5) + MTVRD (v6, r6) + MTVRD (v7, r7) + MTVRD (v8, r8) + + /* Continue interleaved mask generation. */ +#ifdef __LITTLE_ENDIAN__ + vsrw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 0 /* Only care about the high 16 bits of v10. */ +#else + vslw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 1 /* Only care about the low 16 bits of v10. */ +#endif + lvx v0, 0, r3 /* Note, unaligned load ignores lower bits. */ + + /* Do the merging of the bitmask. */ + XXVR(xxmrghd, v5, v5, v6) + XXVR(xxmrghd, v6, v7, v8) + + /* Finish mask generation. */ + vand v11, v11, v4 /* Throwaway bits not in the mask. */ + + /* Compare the first 1-16B, while masking unwanted bytes. */ + clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */ + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vor v7, v7, v11 /* Ignore non-participating bytes. */ + vcmpequh. v8, v7, v4 + bnl cr6, L(done) + + addi r3, r3, 16 + + .align 4 +L(vec): + lvx v0, 0, r3 + addi r3, r3, 16 + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vcmpequh. v8, v7, v4 + blt cr6, L(vec) + + addi r3, r3, -16 +L(done): + subf r3, r0, r3 + MFVRD (r10, v7) + +#ifdef __LITTLE_ENDIAN__ + addi r0, r10, 1 /* Count the trailing 1's. */ + andc r10, r10, r0 + popcntd r10, r10 +#else + xori r10, r10, 0xffff /* Count leading 1's by inverting. */ + addi r3, r3, -48 /* Account for the extra leading zeros. */ + cntlzd r10, r10 +#endif + + add r3, r3, r10 + blr + +END(STRSPN) +libc_hidden_builtin_def (STRSPN) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies new file mode 100644 index 0000000000..fad2505ab9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power8/fpu +powerpc/powerpc64/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies new file mode 100644 index 0000000000..ae0dbaf857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..f11e1bdba2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies new file mode 100644 index 0000000000..dd6bca4b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S new file mode 100644 index 0000000000..2dc4f6c722 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S @@ -0,0 +1,268 @@ +/* Optimized strcmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access for first 32 bytes + as in POWER8 patch and uses vectorised loops after that. */ + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when the minimum required binutils + allows it. */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + li r0, 0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7, r3, 0, 52 + rldicl r9, r4, 0, 52 + cmpldi cr7, r7, 4096-16 + bgt cr7, L(pagecross_check) + cmpldi cr5, r9, 4096-16 + bgt cr5, L(pagecross_check) + + /* For short strings up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8, 0(r3) + ld r10, 0(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + ld r8, 8(r3) + ld r10, 8(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + addi r7, r3, 16 + addi r4, r4, 16 + +L(align): + /* Now it has checked for first 16 bytes. */ + vspltisb v0, 0 + vspltisb v2, -1 + lvsr v6, 0, r4 /* Compute mask. */ + or r5, r4, r7 + andi. r5, r5, 0xF + beq cr0, L(aligned) + andi. r5, r7, 0xF + beq cr0, L(s1_align) + lvsr v10, 0, r7 /* Compute mask. */ + + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v4, r7, v10) + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + clrldi r6, r7, 60 + subfic r5, r6, 16 + add r7, r7, r5 + add r4, r4, r5 + andi. r5, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2 and compares. + Loop until a mismatch or null occurs. */ +L(s1_align): + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(s1_align) + b L(different) + + .align 4 +L(aligned): + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(different_nocmpb): + neg r3, r9 + and r9, r9, r3 + cntlzd r9, r9 + subfic r9, r9, 63 + srd r3, r8, r9 + srd r10, r10, r9 + rldicl r10, r10, 0, 56 + rldicl r3, r3, 0, 56 + subf r3, r10, r3 + extsw r3, r3 + blr + + .align 4 +L(pagecross_check): + subfic r9, r9, 4096 + subfic r7, r7, 4096 + cmpld cr7, r7, r9 + bge cr7, L(pagecross) + mr r7, r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7, r3, r7 + subf r9, r3, r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9, 0(r3) + lbz r10, 0(r4) + addi r3, r3, 1 + addi r4, r4, 1 + cmplw cr7, r9, r10 + cmpdi cr5, r9, r0 + bne cr7, L(pagecross_ne) + beq cr5, L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align) + + .align 4 +L(pagecross_ne): + extsw r3, r9 + mr r9, r10 +L(pagecross_retdiff): + subf r9, r9, r3 + extsw r3, r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3, 0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S new file mode 100644 index 0000000000..c946a5c638 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S @@ -0,0 +1,379 @@ +/* Optimized strncmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment for first 32 bytes and uses + vectorised loops after that. */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + cmplw cr6, r5, r11; \ + ble cr6, 2f; \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when minimum binutils + is upgraded to 2.27. */ + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + cmpdi cr0, r5, 0 + beq cr0, L(ret0) + li r0, 0 + + /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ + rldicl r8, r3, 0, 52 + cmpldi cr7, r8, 4096-32 + bgt cr7, L(pagecross) + rldicl r9, r4, 0, 52 + cmpldi cr7, r9, 4096-32 + bgt cr7, L(pagecross) + + /* For short strings up to 32 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + + /* If the strings compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 +L(align): + /* Now it has checked for first 32 bytes, align source1 to doubleword + and adjust source2 address. */ + vspltisb v0, 0 + vspltisb v2, -1 + or r6, r4, r3 + andi. r6, r6, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 /* Compute mask. */ + clrldi r6, r4, 60 + subfic r11, r6, 16 + andi. r6, r3, 0xF + beq cr0, L(s1_align) + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v5, r4, v6) + lvsr v10, 0, r3 /* Compute mask. */ + clrldi r6, r3, 60 + subfic r11, r6, 16 + GET16BYTES(v4, r3, v10) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + subf r5, r11, r5 + add r3, r3, r11 + add r4, r4, r11 + andi. r11, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + clrldi r6, r4, 60 + subfic r11, r6, 16 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, checks for null + and compares them. Loops until a mismatch or null occurs. */ +L(s1_align): + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(s1_align) + .align 4 +L(aligned): + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(aligned) + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + cmplw cr7, r5, r6 + ble cr7, L(ret0) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(ret0): + li r9, 0 +L(ret1): + mr r3, r9 + blr + + /* The code now checks if r8 and r5 are different by issuing a + cmpb and shifts the result based on its output: + + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + return r1 - r2; */ + + .align 4 +L(different1): + neg r11, r8 + sldi r5, r5, 3 + and r8, r11, r8 + addi r5, r5, -8 + cntlzd r8, r8 + subfic r8, r8, 63 + extsw r8, r8 + cmpld cr7, r8, r5 + ble cr7, L(different2) + mr r8, r5 +L(different2): + extsw r8, r8 + srd r7, r7, r8 + srd r9, r9, r8 + rldicl r3, r7, 0, 56 + rldicl r9, r9, 0, 56 + subf r9, r9, 3 + extsw r9, r9 + mr r3, r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7, 0(r3) + lbz r9, 0(r4) + subfic r8, r8,4095 + cmplw cr7, r9, r7 + bne cr7, L(byte_ne_3) + cmpdi cr7, r9, 0 + beq cr7, L(byte_ne_0) + addi r5, r5, -1 + subf r7, r8, r5 + subf r9, r7, r5 + addi r9, r9, 1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7, L(ret0) + lbz r9, 0(r3) + lbz r8, 0(r4) + addi r5, r5, -1 + cmplw cr7, r9, r8 + cmpdi cr5, r9, 0 + bne cr7, L(byte_ne_2) + beq cr5, L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7, r5, 0 + addi r3, r3, 1 + addi r4, r4, 1 + bdnz L(pagecross_loop0) + cmpdi cr7, r7, 0 + li r9, 0 + bne+ cr7, L(align) + b L(ret1) + + .align 4 +L(byte_ne_0): + li r7, 0 +L(byte_ne_1): + subf r9, r9, r7 + extsw r9, r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7, r9 + mr r9, r8 + b L(byte_ne_1) +L(byte_ne_3): + extsw r7, r7 + b L(byte_ne_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S new file mode 100644 index 0000000000..8312f46644 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S @@ -0,0 +1,39 @@ +/* PowerPC64-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +/* We don't need to save the parameter-passing registers as gcc takes + care of that for us. Thus this function looks fairly normal. + In fact, the generic code would work for us. */ + +ENTRY(_mcount) + mflr r4 + ld r11, 0(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_MIN_SIZE) + std r4, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + ld r3, FRAME_LR_SAVE(r11) + bl JUMPTARGET(__mcount_internal) + nop + ld r0, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE + blr +END(_mcount) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h new file mode 100644 index 0000000000..215e42b63f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h @@ -0,0 +1,124 @@ +/* Dump registers. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/uio.h> +#include <_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +sr0=000000000000020% sr1=000000000000021% dar=000000000000029% dsi=000002a%\n\ +lr=000000000000024% ctr=000000000000023% gr3*=000000000000022% trap=0000028%\n\ +ccr=0000026% xer=0000025%\n\ +gr0-3: 000000000000000% 000000000000001% 000000000000002% 000000000000003%\n\ +gr4-7: 000000000000004% 000000000000005% 000000000000006% 000000000000007%\n\ +gr8-11: 000000000000008% 000000000000009% 00000000000000a% 00000000000000b%\n\ +gr12-15: 00000000000000c% 00000000000000d% 00000000000000e% 00000000000000f%\n\ +gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\ +gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\ +gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\ +gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\ +fscr=000000000000050%\n\ +fp0-3: 000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\ +fp4-7: 000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\ +fp8-11: 000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\ +fp12-15: 00000000000003c% 00000000000003d% 00000000000003e% 00000000000003f%\n\ +fp16-19: 000000000000040% 000000000000041% 000000000000042% 000000000000043%\n\ +fp20-23: 000000000000044% 000000000000045% 000000000000046% 000000000000047%\n\ +fp24-27: 000000000000048% 000000000000049% 00000000000004a% 00000000000004b%\n\ +fp28-31: 00000000000004c% 00000000000004d% 00000000000004e% 00000000000004f%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned long *regs = (unsigned long *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer) - 1); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c new file mode 100644 index 0000000000..f3ed8ad1e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c @@ -0,0 +1,4 @@ +/* PPCA2 has a different cache-line size than the usual 128 bytes. To avoid + using code that assumes cache-line size to be 128 bytes (with dcbz + instructions) we use the generic code instead. */ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S new file mode 100644 index 0000000000..20f6cf364c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -0,0 +1,245 @@ +/* setjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +#include <novmxsetjmp.h> +#else +#include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (setjmp_symbol) + CALL_MCOUNT 1 + li r4,1 /* Set second argument to 1. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (setjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of _setjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. __GI__setjmp is used in csu/libc-start.c when + HAVE_CLEANUP_JMP_BUF is defined. */ +ENTRY (__GI__setjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI__setjmp) +#endif + +ENTRY (_setjmp_symbol) + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (_setjmp_symbol) +libc_hidden_def (_setjmp_symbol) + +ENTRY (__sigsetjmp_symbol) + CALL_MCOUNT 2 +JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)): +#ifdef PTR_MANGLE + mr r5, r1 + PTR_MANGLE (r5, r6) + std r5,(JB_GPR1*8)(3) +#else + std r1,(JB_GPR1*8)(3) +#endif + mflr r0 +#if defined SHARED && !IS_IN (rtld) + ld r5,FRAME_TOC_SAVE(r1) /* Retrieve the callers TOC. */ + std r5,(JB_GPR2*8)(3) +#else + std r2,(JB_GPR2*8)(3) +#endif + /* setjmp probe expects longjmp first argument (8@3), second argument + (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (setjmp, 3, 8@3, -4@4, 8@0) + std r14,((JB_GPRS+0)*8)(3) + stfd fp14,((JB_FPRS+0)*8)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif + std r0,(JB_LR*8)(3) + std r15,((JB_GPRS+1)*8)(3) + stfd fp15,((JB_FPRS+1)*8)(3) + mfcr r0 + std r16,((JB_GPRS+2)*8)(3) + stfd fp16,((JB_FPRS+2)*8)(3) + stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */ + std r17,((JB_GPRS+3)*8)(3) + stfd fp17,((JB_FPRS+3)*8)(3) + std r18,((JB_GPRS+4)*8)(3) + stfd fp18,((JB_FPRS+4)*8)(3) + std r19,((JB_GPRS+5)*8)(3) + stfd fp19,((JB_FPRS+5)*8)(3) + std r20,((JB_GPRS+6)*8)(3) + stfd fp20,((JB_FPRS+6)*8)(3) + std r21,((JB_GPRS+7)*8)(3) + stfd fp21,((JB_FPRS+7)*8)(3) + std r22,((JB_GPRS+8)*8)(3) + stfd fp22,((JB_FPRS+8)*8)(3) + std r23,((JB_GPRS+9)*8)(3) + stfd fp23,((JB_FPRS+9)*8)(3) + std r24,((JB_GPRS+10)*8)(3) + stfd fp24,((JB_FPRS+10)*8)(3) + std r25,((JB_GPRS+11)*8)(3) + stfd fp25,((JB_FPRS+11)*8)(3) + std r26,((JB_GPRS+12)*8)(3) + stfd fp26,((JB_FPRS+12)*8)(3) + std r27,((JB_GPRS+13)*8)(3) + stfd fp27,((JB_FPRS+13)*8)(3) + std r28,((JB_GPRS+14)*8)(3) + stfd fp28,((JB_FPRS+14)*8)(3) + std r29,((JB_GPRS+15)*8)(3) + stfd fp29,((JB_FPRS+15)*8)(3) + std r30,((JB_GPRS+16)*8)(3) + stfd fp30,((JB_FPRS+16)*8)(3) + std r31,((JB_GPRS+17)*8)(3) + stfd fp31,((JB_FPRS+17)*8)(3) +#ifndef __NO_VMX__ + ld r6,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) +# else + ld r6,0(r6) /* Load extern _dl_hwcap. */ +# endif + andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE + stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + addi r6,r5,16 + beq+ L(aligned_save_vmx) + + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 + +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr + + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ + + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) + + b L(no_vmx) + +L(aligned_save_vmx): + stvx 20,0,r5 + addi r5,r5,32 + stvx 21,0,r6 + addi r6,r6,32 + stvx 22,0,r5 + addi r5,r5,32 + stvx 23,0,r6 + addi r6,r6,32 + stvx 24,0,r5 + addi r5,r5,32 + stvx 25,0,r6 + addi r6,r6,32 + stvx 26,0,r5 + addi r5,r5,32 + stvx 27,0,r6 + addi r6,r6,32 + stvx 28,0,r5 + addi r5,r5,32 + stvx 29,0,r6 + addi r6,r6,32 + stvx 30,0,r5 + stvx 31,0,r6 +L(no_vmx): +#else + li r6,0 +#endif +#if IS_IN (rtld) + li r3,0 + blr +#elif defined SHARED + b JUMPTARGET (__sigjmp_save_symbol) +#else + mflr r0 + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE) + cfi_offset(lr,FRAME_LR_SAVE) + bl JUMPTARGET (__sigjmp_save_symbol) + nop + ld r0,FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + addi r1,r1,FRAME_MIN_SIZE + mtlr r0 + blr +#endif +END (__sigsetjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of __sigsetjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. */ +ENTRY (__GI___sigsetjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI___sigsetjmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S new file mode 100644 index 0000000000..3f61d28203 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S @@ -0,0 +1,61 @@ +/* AltiVec (new) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define setjmp_symbol setjmp +# define _setjmp_symbol _setjmp +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define setjmp_symbol __vmxsetjmp +# define _setjmp_symbol __vmx_setjmp +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__vmxsetjmp, __vmx__setjmp) +strong_alias (__vmx__sigsetjmp, __setjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# undef setjmp_symbol +# undef _setjmp_symbol +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +# define __NO_VMX__ +compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3) +compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3); +compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3) +# define setjmp_symbol __novmxsetjmp +# define _setjmp_symbol __novmx_setjmp +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__novmxsetjmp, __novmx__setjmp) +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h new file mode 100644 index 0000000000..e80a683e64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h @@ -0,0 +1,14 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ \ + uintptr_t x; \ + asm ("ld %0,%1(13)" \ + : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \ + ); \ + x; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/start.S b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S new file mode 100644 index 0000000000..937c39a740 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S @@ -0,0 +1,92 @@ +/* Startup code for programs linked with GNU libc. PowerPC64 version. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* We do not want .eh_frame info for crt1.o since crt1.o is linked + before crtbegin.o, the file defining __EH_FRAME_BEGIN__. */ +#undef cfi_startproc +#define cfi_startproc +#undef cfi_endproc +#define cfi_endproc + + /* These are the various addresses we require. */ +#ifdef PIC + .section ".data.rel.ro.local","aw" +#else + .section ".rodata" +#endif + .align 3 +L(start_addresses): + .quad 0 /* was _SDA_BASE_ but not in 64-bit ABI*/ +/* function descriptors so don't need JUMPTARGET */ + .quad main + .quad __libc_csu_init + .quad __libc_csu_fini + + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".toc","aw" +.L01: + .tc L(start_addresses)[TC],L(start_addresses) + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrdi r1,r1,4 + li r0,0 + stdu r1,-128(r1) + mtlr r0 + std r0,0(r1) + + /* put the address of start_addresses in r8... ** +** PPC64 ABI uses R13 for thread local, so we leave it alone */ + ld r8,.L01@toc(r2) + + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(__libc_start_main) +/* The linker needs this nop to recognize that it's OK to call via a + TOC adjusting stub. */ + nop + +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S new file mode 100644 index 0000000000..cbfcc14cfe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S @@ -0,0 +1,155 @@ +/* Optimized strchr implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +#ifndef STRCHR +# define STRCHR strchr +#endif + +ENTRY (STRCHR) + CALL_MCOUNT 2 + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + dcbt 0,rRTN + insrdi rCHR, rCHR, 8, 48 + li rMASK, -1 + insrdi rCHR, rCHR, 16, 32 + rlwinm rIGN, rRTN, 3, 26, 28 + insrdi rCHR, rCHR, 32, 0 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrdi rSTR, rRTN, 3 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP1, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP1 +/* Test the first (partial?) word. */ + ld rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rIGN +#else + srd rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + ldu rWORD, 8(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmpld rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 64-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + cmpld rWORD, rTMP2 + bgtlr +#endif + srdi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + subfic rCLZB, rCLZB, 64-7-64 + sradi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + subi rSTR, rSTR, 8 + srdi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (STRCHR) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S new file mode 100644 index 0000000000..ab5f8c231c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S @@ -0,0 +1,180 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + lis rFEFE, -0x101 + bne L(unaligned) + + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): ldu rWORD1, 8(rSTR1) + bne cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + blr +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S new file mode 100644 index 0000000000..1466624c6a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S @@ -0,0 +1,203 @@ +/* Optimized strlen implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. You get 0x80 in bytes that + match, but possibly false 0x80 matches in the next more significant + byte to a true match due to carries. For little-endian this is + of no consequence since the least significant match is the one + we're interested in, but big-endian needs method 2 to find which + byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. + + Answer: + 1) Added a Data Cache Block Touch early to prefetch the first 128 + byte cache line. Adding dcbt instructions to the loop would not be + effective since most strings will be shorter than the cache line. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +ENTRY (STRLEN) + CALL_MCOUNT 1 + +#define rTMP4 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rWORD1 r8 /* current string doubleword */ +#define rWORD2 r9 /* next string doubleword */ +#define rMASK r9 /* mask for first string doubleword */ +#define rTMP1 r10 +#define rTMP2 r11 +#define rTMP3 r12 + + dcbt 0,rRTN + clrrdi rSTR, rRTN, 3 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 26, 28 + ld rWORD1, 0(rSTR) + addi r7F7F, r7F7F, 0x7f7f + li rMASK, -1 + insrdi r7F7F, r7F7F, 32, 0 +/* We use method (2) on the first two doublewords, because rFEFE isn't + required which reduces setup overhead. Also gives a faster return + for small strings on big-endian due to needing to recalculate with + method (2) anyway. */ +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rPADN +#else + srd rMASK, rMASK, rPADN +#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + lis rFEFE, -0x101 + add rTMP1, rTMP1, r7F7F + addi rFEFE, rFEFE, -0x101 + nor rTMP3, rTMP2, rTMP1 + and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + sldi rTMP1, rFEFE, 32 + add rFEFE, rFEFE, rTMP1 +/* Are we now aligned to a doubleword boundary? */ + bt 28, L(loop) + +/* Handle second doubleword of pair. */ +/* Perhaps use method (1) here for little-endian, saving one instruction? */ + ldu rWORD1, 8(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + +/* The loop. */ + +L(loop): + ld rWORD1, 8(rSTR) + ldu rWORD2, 16(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP3, rTMP3, rTMP4 + beq L(loop) + +#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 8 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first doubleword in the string that + contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00 + otherwise. */ +L(done0): + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srdi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#else + +L(done0): + addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ + andc rTMP1, rTMP1, rTMP3 + cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */ + subf rTMP3, rRTN, rSTR + subfic rTMP1, rTMP1, 64-7 + srdi rTMP1, rTMP1, 3 + add rRTN, rTMP1, rTMP3 + blr + +L(done1): + addi rTMP3, rTMP1, -1 + andc rTMP3, rTMP3, rTMP1 + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + subfic rTMP3, rTMP3, 64-7-64 + sradi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#endif + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S new file mode 100644 index 0000000000..076599804a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S @@ -0,0 +1,210 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(uz) +L(ux): + li rRTN, 0 + blr + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) +L(u0): + lbzu rWORD2, 1(rSTR2) +L(u1): + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + beq- cr1, L(u3) + lbzu rWORD1, 1(rSTR1) + bne- L(u2) + lbzu rWORD2, 1(rSTR2) + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + bne+ cr1, L(u0) + +L(u2): lbzu rWORD1, -1(rSTR1) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S new file mode 100644 index 0000000000..df93b4c3f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S @@ -0,0 +1,21 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and subtract + the result to a second limb vector. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_SUBMUL +#include "addmul_1.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h new file mode 100644 index 0000000000..db7c1d78b5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h @@ -0,0 +1,425 @@ +/* Assembly macros for 64-bit PowerPC. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/sysdep.h> + +#ifdef __ASSEMBLER__ + +/* Stack frame offsets. */ +#if _CALL_ELF != 2 +#define FRAME_MIN_SIZE 112 +#define FRAME_MIN_SIZE_PARM 112 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 40 +#define FRAME_PARM_SAVE 48 +#define FRAME_PARM1_SAVE 48 +#define FRAME_PARM2_SAVE 56 +#define FRAME_PARM3_SAVE 64 +#define FRAME_PARM4_SAVE 72 +#define FRAME_PARM5_SAVE 80 +#define FRAME_PARM6_SAVE 88 +#define FRAME_PARM7_SAVE 96 +#define FRAME_PARM8_SAVE 104 +#define FRAME_PARM9_SAVE 112 +#else +#define FRAME_MIN_SIZE 32 +#define FRAME_MIN_SIZE_PARM 96 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 24 +#define FRAME_PARM_SAVE 32 +#define FRAME_PARM1_SAVE 32 +#define FRAME_PARM2_SAVE 40 +#define FRAME_PARM3_SAVE 48 +#define FRAME_PARM4_SAVE 56 +#define FRAME_PARM5_SAVE 64 +#define FRAME_PARM6_SAVE 72 +#define FRAME_PARM7_SAVE 80 +#define FRAME_PARM8_SAVE 88 +#define FRAME_PARM9_SAVE 96 +#endif + +/* Support macros for CALL_MCOUNT. */ +#if _CALL_ELF == 2 +#define call_mcount_parm_offset (-64) +#else +#define call_mcount_parm_offset FRAME_PARM_SAVE +#endif + .macro SAVE_ARG NARG + .if \NARG + SAVE_ARG \NARG-1 + std 2+\NARG,call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro REST_ARG NARG + .if \NARG + REST_ARG \NARG-1 + ld 2+\NARG,FRAME_MIN_SIZE_PARM+call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro CFI_SAVE_ARG NARG + .if \NARG + CFI_SAVE_ARG \NARG-1 + cfi_offset(2+\NARG,call_mcount_parm_offset-8+8*(\NARG)) + .endif + .endm + + .macro CFI_REST_ARG NARG + .if \NARG + CFI_REST_ARG \NARG-1 + cfi_restore(2+\NARG) + .endif + .endm + +/* If compiled for profiling, call `_mcount' at the start of each function. + see ppc-mcount.S for more details. */ + .macro CALL_MCOUNT NARG +#ifdef PROF + mflr r0 + SAVE_ARG \NARG + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE_PARM(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE_PARM) + cfi_offset(lr,FRAME_LR_SAVE) + CFI_SAVE_ARG \NARG + bl JUMPTARGET (_mcount) +#ifndef SHARED + nop +#endif + ld r0,FRAME_MIN_SIZE_PARM+FRAME_LR_SAVE(r1) + REST_ARG \NARG + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE_PARM + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE_PARM) + cfi_restore(lr) + CFI_REST_ARG \NARG +#endif + .endm + +#if _CALL_ELF != 2 + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + ld r12,0(\PTR) + ld r2,8(\PTR) + mtctr r12 + ld r11,16(\PTR) + .endm + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase +#else +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase, 0 +#endif + +#define ENTRY_1(name) \ + .type BODY_LABEL(name),@function; \ + .globl name; \ + .section ".opd","aw"; \ + .align 3; \ +name##: OPD_ENT (name); \ + .previous; + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) .LY##X +#define ENTRY_2(name) \ + .type name,@function; \ + ENTRY_1(name) +#define END_2(name) \ + .size name,.-BODY_LABEL(name); \ + .size BODY_LABEL(name),.-BODY_LABEL(name); +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + mr r12,\PTR + mtctr r12 + .endm + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) X +#define ENTRY_2(name) \ + .globl name; \ + .type name,@function; +#define END_2(name) \ + .size name,.-name; +#define LOCALENTRY(name) \ +1: addis r2,r12,.TOC.-1b@ha; \ + addi r2,r2,.TOC.-1b@l; \ + .localentry name,.-name; + +#endif /* _CALL_ELF */ + +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(2); \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^alignt boundary. */ +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +/* Local labels stripped out by the linker. */ +#undef L +#define L(x) .L##x + +#define tostring(s) #s +#define stringify(s) tostring(s) +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE(a,b) +#define LT_LABEL(name) GLUE(.LT,name) +#define LT_LABELSUFFIX(name,suffix) GLUE(GLUE(.LT,name),suffix) + +/* Support Traceback tables */ +#define TB_ASM 0x000c000000000000 +#define TB_GLOBALLINK 0x0000800000000000 +#define TB_IS_EPROL 0x0000400000000000 +#define TB_HAS_TBOFF 0x0000200000000000 +#define TB_INT_PROC 0x0000100000000000 +#define TB_HAS_CTL 0x0000080000000000 +#define TB_TOCLESS 0x0000040000000000 +#define TB_FP_PRESENT 0x0000020000000000 +#define TB_LOG_ABORT 0x0000010000000000 +#define TB_INT_HANDL 0x0000008000000000 +#define TB_NAME_PRESENT 0x0000004000000000 +#define TB_USES_ALLOCA 0x0000002000000000 +#define TB_SAVES_CR 0x0000000200000000 +#define TB_SAVES_LR 0x0000000100000000 +#define TB_STORES_BC 0x0000000080000000 +#define TB_FIXUP 0x0000000040000000 +#define TB_FP_SAVED(fprs) (((fprs) & 0x3f) << 24) +#define TB_GPR_SAVED(gprs) (((fprs) & 0x3f) << 16) +#define TB_FIXEDPARMS(parms) (((parms) & 0xff) << 8) +#define TB_FLOATPARMS(parms) (((parms) & 0x7f) << 1) +#define TB_PARMSONSTK 0x0000000000000001 + +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define TB_DEFAULT TB_ASM | TB_HAS_TBOFF | TB_NAME_PRESENT + +#define TRACEBACK(name) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +#define TRACEBACK_MASK(name,mask) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT | mask ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +/* END generates Traceback tables */ +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(name) \ + END_2(name) + +/* This form supports more informative traceback tables */ +#define END_GEN_TB(name,mask) \ + cfi_endproc; \ + TRACEBACK_MASK(name,mask) \ + END_2(name) + +#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + cmpdi 13,0; \ + beq 1f; \ + lwz 0,TM_CAPABLE(13); \ + cmpwi 0,0; \ + beq 1f; \ + li 11,_ABORT_SYSCALL; \ + tabort. 11; \ + .align 4; \ +1: +#else +# define ABORT_TRANSACTION +#endif + +#define DO_CALL(syscall) \ + ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +/* ppc64 is always PIC */ +#undef JUMPTARGET +#define JUMPTARGET(name) DOT_LABEL(name) + +#define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#ifdef SHARED +#define TAIL_CALL_SYSCALL_ERROR \ + b JUMPTARGET(__syscall_error) +#else +/* Static version might be linked into a large app with a toc exceeding + 64k. We can't put a toc adjusting stub on a plain branch, so can't + tail call __syscall_error. */ +#define TAIL_CALL_SYSCALL_ERROR \ + .ifdef .Local_syscall_error; \ + b .Local_syscall_error; \ + .else; \ +.Local_syscall_error: \ + mflr 0; \ + std 0,FRAME_LR_SAVE(1); \ + stdu 1,-FRAME_MIN_SIZE(1); \ + cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \ + cfi_offset(lr,FRAME_LR_SAVE); \ + bl JUMPTARGET(__syscall_error); \ + nop; \ + ld 0,FRAME_MIN_SIZE+FRAME_LR_SAVE(1); \ + addi 1,1,FRAME_MIN_SIZE; \ + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE); \ + mtlr 0; \ + cfi_restore(lr); \ + blr; \ + .endif +#endif + +#define PSEUDO_RET \ + bnslr+; \ + TAIL_CALL_SYSCALL_ERROR + +#define ret PSEUDO_RET + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +#define PSEUDO_NOERRNO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_NOERRNO \ + blr + +#define ret_NOERRNO PSEUDO_RET_NOERRNO + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name) \ + END (name) + +#define PSEUDO_ERRVAL(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_ERRVAL \ + blr + +#define ret_ERRVAL PSEUDO_RET_ERRVAL + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name) \ + END (name) + +#else /* !__ASSEMBLER__ */ + +#if _CALL_ELF != 2 + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "ld 12,0(" #ptr ");\n" \ + "ld 2,8(" #ptr ");\n" \ + "mtctr 12;\n" \ + "ld 11,16(" #ptr ");" + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase;" +#else +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase, 0;" +#endif + +#define ENTRY_1(name) \ + ".type " BODY_PREFIX #name ",@function;\n" \ + ".globl " #name ";\n" \ + ".pushsection \".opd\",\"aw\";\n" \ + ".align 3;\n" \ +#name ":\n" \ + OPD_ENT (name) "\n" \ + ".popsection;" + +#define DOT_PREFIX "" +#define BODY_PREFIX ".LY" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ENTRY_1(name) +#define END_2(name) \ + ".size " #name ",.-" BODY_PREFIX #name ";\n" \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "mr 12," #ptr ";\n" \ + "mtctr 12;" + +#define DOT_PREFIX "" +#define BODY_PREFIX "" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ".globl " #name ";" +#define END_2(name) \ + ".size " #name ",.-" #name ";" +#define LOCALENTRY(name) \ + "1: addis 2,12,.TOC.-1b@ha;\n" \ + "addi 2,2,.TOC.-1b@l;\n" \ + ".localentry " #name ",.-" #name ";" + +#endif /* _CALL_ELF */ + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h new file mode 100644 index 0000000000..42a95ec5c1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h @@ -0,0 +1,44 @@ +/* Include sysdeps/powerpc/tls-macros.h for __TLS_CALL_CLOBBERS */ +#include_next "tls-macros.h" + +/* PowerPC64 Local Exec TLS access. */ +#define TLS_LE(x) \ + ({ int * __result; \ + asm ("addis %0,13," #x "@tprel@ha\n\t" \ + "addi %0,%0," #x "@tprel@l" \ + : "=b" (__result) ); \ + __result; \ + }) +/* PowerPC64 Initial Exec TLS access. */ +#define TLS_IE(x) \ + ({ int * __result; \ + asm ("ld %0," #x "@got@tprel(2)\n\t" \ + "add %0,%0," #x "@tls" \ + : "=r" (__result) ); \ + __result; \ + }) + +#define __TLS_GET_ADDR "__tls_get_addr" + +/* PowerPC64 Local Dynamic TLS access. */ +#define TLS_LD(x) \ + ({ int * __result; \ + asm ("addi 3,2," #x "@got@tlsld\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop \n\t" \ + "addis %0,3," #x "@dtprel@ha\n\t" \ + "addi %0,%0," #x "@dtprel@l" \ + : "=b" (__result) : \ + : "3", __TLS_CALL_CLOBBERS); \ + __result; \ + }) +/* PowerPC64 General Dynamic TLS access. */ +#define TLS_GD(x) \ + ({ register int *__result __asm__ ("r3"); \ + asm ("addi 3,2," #x "@got@tlsgd\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop " \ + : "=r" (__result) : \ + : __TLS_CALL_CLOBBERS); \ + __result; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h new file mode 100644 index 0000000000..b25040b9f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h @@ -0,0 +1,33 @@ +/* Definitions for testing PLT entry/exit auditing. PowerPC64 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if _CALL_ELF != 2 +#define pltenter la_ppc64_gnu_pltenter +#define pltexit la_ppc64_gnu_pltexit +#define La_regs La_ppc64_regs +#define La_retval La_ppc64_retval +#define int_retval lrv_r3 +#else +#define pltenter la_ppc64v2_gnu_pltenter +#define pltexit la_ppc64v2_gnu_pltexit +#define La_regs La_ppc64v2_regs +#define La_retval La_ppc64v2_retval +#define int_retval lrv_r3 +#endif |