about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/le/power9/rawmemchr.S
blob: 670107b36f00bdbe78ca16bddf4a9e34ee926385 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* Optimized rawmemchr implementation for PowerPC64/POWER9.
   Copyright (C) 2020-2021 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <sysdep.h>

#ifndef RAWMEMCHR
# define RAWMEMCHR __rawmemchr
#endif

/* Implements the function

   int [r3] rawmemchr (void *s [r3], int c [r4])

   The implementation can load bytes past a matching byte, but only
   up to the next 16B boundary, so it never crosses a page.  */

.machine power9
ENTRY_TOCLESS (RAWMEMCHR, 4)
	CALL_MCOUNT 2

	xori	r5,r4,0xff

	mtvsrd	v18+32,r4	/* matching char in v18  */
	mtvsrd	v19+32,r5	/* non matching char in v19  */

	vspltb	v18,v18,7	/* replicate  */
	vspltb	v19,v19,7	/* replicate  */

	neg	r5,r3
	rldicl	r9,r5,0,60	/* How many bytes to get source 16B aligned?  */

	/* Align data and fill bytes not loaded with non matching char  */
	lvx	v0,0,r3
	lvsr	v1,0,r3
	vperm	v0,v19,v0,v1

	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
	beq	cr6,L(aligned)

	vctzlsbb r0,v6
	add	r3,r3,r0
	blr

L(aligned):
	add	r3,r3,r9

L(loop):
	lxv	v0+32,0(r3)
	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
	bne	cr6,L(tail1)

	lxv	v0+32,16(r3)
	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
	bne	cr6,L(tail2)

	lxv	v0+32,32(r3)
	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
	bne	cr6,L(tail3)

	lxv	v0+32,48(r3)
	vcmpequb. v6,v0,v18	/* 0xff if byte matches, 0x00 otherwise  */
	bne	cr6,L(tail4)

	addi	r3,r3,64
	b	L(loop)

L(tail1):
	vctzlsbb r0,v6
	add	r3,r3,r0
	blr

L(tail2):
	vctzlsbb r0,v6
	add	r3,r3,r0
	addi	r3,r3,16
	blr

L(tail3):
	vctzlsbb r0,v6
	add	r3,r3,r0
	addi	r3,r3,32
	blr

L(tail4):
	vctzlsbb r0,v6
	add	r3,r3,r0
	addi	r3,r3,48
	blr

END (RAWMEMCHR)
weak_alias (__rawmemchr,rawmemchr)
libc_hidden_builtin_def (__rawmemchr)