/* Optimized strcpy implementation for PowerPC64/POWER9.
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
. */
#include
#ifdef USE_AS_STPCPY
# ifndef STPCPY
# define FUNC_NAME __stpcpy
# else
# define FUNC_NAME STPCPY
# endif
#else
# ifndef STRCPY
# define FUNC_NAME strcpy
# else
# define FUNC_NAME STRCPY
# endif
#endif /* !USE_AS_STPCPY */
/* Implements the function
char * [r3] strcpy (char *dest [r3], const char *src [r4])
or
char * [r3] stpcpy (char *dest [r3], const char *src [r4])
if USE_AS_STPCPY is defined.
The implementation can load bytes past a null terminator, but only
up to the next 16B boundary, so it never crosses a page. */
/* Load quadword at addr+offset to vreg, check for null bytes,
and branch to label if any are found. */
#define CHECK16(vreg,offset,addr,label) \
lxv vreg+32,offset(addr); \
vcmpequb. v6,vreg,v18; \
bne cr6,L(label);
.machine power9
ENTRY_TOCLESS (FUNC_NAME, 4)
CALL_MCOUNT 2
vspltisb v18,0 /* Zeroes in v18 */
vspltisb v19,-1 /* 0xFF bytes in v19 */
/* Next 16B-aligned address. Prepare address for L(loop). */
addi r5,r4,16
clrrdi r5,r5,4
subf r8,r4,r5
add r11,r3,r8
/* Align data and fill bytes not loaded with non matching char. */
lvx v0,0,r4
lvsr v1,0,r4
vperm v0,v19,v0,v1
vcmpequb. v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */
beq cr6,L(no_null)
/* There's a null byte. */
vctzlsbb r8,v6 /* Number of trailing zeroes */
addi r9,r8,1 /* Add null byte. */
sldi r10,r9,56 /* stxvl wants size in top 8 bits. */
stxvl 32+v0,r3,r10 /* Partial store */
#ifdef USE_AS_STPCPY
/* stpcpy returns the dest address plus the size not counting the
final '\0'. */
add r3,r3,r8
#endif
blr
L(no_null):
sldi r10,r8,56 /* stxvl wants size in top 8 bits */
stxvl 32+v0,r3,r10 /* Partial store */
.p2align 4
L(loop):
CHECK16(v0,0,r5,tail1)
CHECK16(v1,16,r5,tail2)
CHECK16(v2,32,r5,tail3)
CHECK16(v3,48,r5,tail4)
CHECK16(v4,64,r5,tail5)
CHECK16(v5,80,r5,tail6)
stxv 32+v0,0(r11)
stxv 32+v1,16(r11)
stxv 32+v2,32(r11)
stxv 32+v3,48(r11)
stxv 32+v4,64(r11)
stxv 32+v5,80(r11)
addi r5,r5,96
addi r11,r11,96
b L(loop)
.p2align 4
L(tail1):
vctzlsbb r8,v6 /* Number of trailing zeroes */
addi r9,r8,1 /* Add null terminator */
sldi r9,r9,56 /* stxvl wants size in top 8 bits */
stxvl 32+v0,r11,r9 /* Partial store */
#ifdef USE_AS_STPCPY
/* stpcpy returns the dest address plus the size not counting the
final '\0'. */
add r3,r11,r8
#endif
blr
.p2align 4
L(tail2):
stxv 32+v0,0(r11)
vctzlsbb r8,v6
addi r9,r8,1
sldi r9,r9,56
addi r11,r11,16
stxvl 32+v1,r11,r9
#ifdef USE_AS_STPCPY
add r3,r11,r8
#endif
blr
.p2align 4
L(tail3):
stxv 32+v0,0(r11)
stxv 32+v1,16(r11)
vctzlsbb r8,v6
addi r9,r8,1
sldi r9,r9,56
addi r11,r11,32
stxvl 32+v2,r11,r9
#ifdef USE_AS_STPCPY
add r3,r11,r8
#endif
blr
.p2align 4
L(tail4):
stxv 32+v0,0(r11)
stxv 32+v1,16(r11)
stxv 32+v2,32(r11)
vctzlsbb r8,v6
addi r9,r8,1
sldi r9,r9,56
addi r11,r11,48
stxvl 32+v3,r11,r9
#ifdef USE_AS_STPCPY
add r3,r11,r8
#endif
blr
.p2align 4
L(tail5):
stxv 32+v0,0(r11)
stxv 32+v1,16(r11)
stxv 32+v2,32(r11)
stxv 32+v3,48(r11)
vctzlsbb r8,v6
addi r9,r8,1
sldi r9,r9,56
addi r11,r11,64
stxvl 32+v4,r11,r9
#ifdef USE_AS_STPCPY
add r3,r11,r8
#endif
blr
.p2align 4
L(tail6):
stxv 32+v0,0(r11)
stxv 32+v1,16(r11)
stxv 32+v2,32(r11)
stxv 32+v3,48(r11)
stxv 32+v4,64(r11)
vctzlsbb r8,v6
addi r9,r8,1
sldi r9,r9,56
addi r11,r11,80
stxvl 32+v5,r11,r9
#ifdef USE_AS_STPCPY
add r3,r11,r8
#endif
blr
END (FUNC_NAME)
#ifndef USE_AS_STPCPY
libc_hidden_builtin_def (strcpy)
#endif