1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
/* Optimized rawmemchr implementation for PowerPC64/POWER9.
Copyright (C) 2020-2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#ifndef RAWMEMCHR
# define RAWMEMCHR __rawmemchr
#endif
/* Implements the function
int [r3] rawmemchr (void *s [r3], int c [r4])
The implementation can load bytes past a matching byte, but only
up to the next 16B boundary, so it never crosses a page. */
.machine power9
ENTRY_TOCLESS (RAWMEMCHR, 4)
CALL_MCOUNT 2
xori r5,r4,0xff
mtvsrd v18+32,r4 /* matching char in v18 */
mtvsrd v19+32,r5 /* non matching char in v19 */
vspltb v18,v18,7 /* replicate */
vspltb v19,v19,7 /* replicate */
neg r5,r3
rldicl r9,r5,0,60 /* How many bytes to get source 16B aligned? */
/* Align data and fill bytes not loaded with non matching char */
lvx v0,0,r3
lvsr v1,0,r3
vperm v0,v19,v0,v1
vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */
beq cr6,L(aligned)
vctzlsbb r0,v6
add r3,r3,r0
blr
L(aligned):
add r3,r3,r9
L(loop):
lxv v0+32,0(r3)
vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */
bne cr6,L(tail1)
lxv v0+32,16(r3)
vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */
bne cr6,L(tail2)
lxv v0+32,32(r3)
vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */
bne cr6,L(tail3)
lxv v0+32,48(r3)
vcmpequb. v6,v0,v18 /* 0xff if byte matches, 0x00 otherwise */
bne cr6,L(tail4)
addi r3,r3,64
b L(loop)
L(tail1):
vctzlsbb r0,v6
add r3,r3,r0
blr
L(tail2):
vctzlsbb r0,v6
add r3,r3,r0
addi r3,r3,16
blr
L(tail3):
vctzlsbb r0,v6
add r3,r3,r0
addi r3,r3,32
blr
L(tail4):
vctzlsbb r0,v6
add r3,r3,r0
addi r3,r3,48
blr
END (RAWMEMCHR)
weak_alias (__rawmemchr,rawmemchr)
libc_hidden_builtin_def (__rawmemchr)
|