about summary refs log tree commit diff
path: root/sysdeps/ia64/memccpy.S
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-09-09 20:18:53 +0000
committerUlrich Drepper <drepper@redhat.com>2003-09-09 20:18:53 +0000
commite352f0d83289322fe9706ee45c449170653f85e8 (patch)
tree7715fce2fc7f8924d72931a7ef4febd5860669f7 /sysdeps/ia64/memccpy.S
parent2036b767dd60813df266647ab1e23159f5043706 (diff)
downloadglibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.gz
glibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.xz
glibc-e352f0d83289322fe9706ee45c449170653f85e8.zip
Update.
2003-09-09  Ulrich Drepper  <drepper@redhat.com>

	* string/stratcliff.c (main): Check memccpy.

2003-04-11  Jes Sorensen  <jes@wildopensource.com>

	* sysdeps/ia64/memccpy.S: When recovering for src_aligned and the
	character is found during recovery, use correct register when
	determining the position of the found character.

2003-04-01  Jes Sorensen  <jes@wildopensource.com>

	* sysdeps/ia64/memccpy.S: Use speculatively loads for readahead to
	avoid segfaults when reading from unmapped pages. For aligned
	reload and continue, for misaligned, roll back and use byte copy.
	Save ar.ec on entry and restore on exit.
Diffstat (limited to 'sysdeps/ia64/memccpy.S')
-rw-r--r--sysdeps/ia64/memccpy.S52
1 files changed, 45 insertions, 7 deletions
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S
index 7ffe9b9f38..6bccb96b70 100644
--- a/sysdeps/ia64/memccpy.S
+++ b/sysdeps/ia64/memccpy.S
@@ -1,6 +1,6 @@
 /* Optimized version of the memccpy() function.
    This file is part of the GNU C Library.
-   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
    Contributed by Dan Pop <Dan.Pop@cern.ch>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -46,6 +46,7 @@
 #define tmp		r23
 #define char		r24
 #define charx8		r25
+#define saved_ec	r26
 #define sh2		r28
 #define	sh1		r29
 #define loopcnt		r30
@@ -56,25 +57,27 @@ ENTRY(memccpy)
 	alloc 	r2 = ar.pfs, 4, 40 - 4, 0, 40
 
 #include "softpipe.h"
-	.rotr	r[MEMLAT + 3], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
+	.rotr	r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
 	.rotp	p[MEMLAT + 6 + 1]
 
 	mov	ret0 = r0		// return NULL if no match
 	.save pr, saved_pr
 	mov	saved_pr = pr		// save the predicate registers
+	mov 	dest = in0		// dest
 	.save ar.lc, saved_lc
         mov 	saved_lc = ar.lc	// save the loop counter
+        mov 	saved_ec = ar.ec	// save the loop counter
 	.body
-	mov 	dest = in0		// dest
 	mov 	src = in1		// src
 	extr.u	char = in2, 0, 8	// char
 	mov	len = in3		// len
 	sub	tmp = r0, in0		// tmp = -dest
 	cmp.ne	p7, p0 = r0, r0		// clear p7
 	;;
-	and	loopcnt = 7, tmp		// loopcnt = -dest % 8
+	and	loopcnt = 7, tmp	// loopcnt = -dest % 8
 	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES
-(p6)	br.cond.spnt	.cpyfew			// copy byte by byte
+	mov	ar.ec = 0		// ec not guaranteed zero on entry
+(p6)	br.cond.spnt	.cpyfew		// copy byte by byte
 	;;
 	cmp.eq	p6, p0 = loopcnt, r0
 	mux1	charx8 = char, @brcst
@@ -109,26 +112,31 @@ ENTRY(memccpy)
 	cmp.ne	p6, p0 = r0, r0	;;	// clear p6
 	.align	32
 .l2:
-(p[0])		ld8	r[0] = [asrc], 8		// r[0] = w1
+(p[0])		ld8.s	r[0] = [asrc], 8		// r[0] = w1
 (p[MEMLAT])	shr.u	tmp1[0] = r[1 + MEMLAT], sh1	// tmp1 = w0 >> sh1
 (p[MEMLAT])	shl	tmp2[0] = r[0 + MEMLAT], sh2  	// tmp2 = w1 << sh2
 (p[MEMLAT+4])	xor	tmp3[0] = val[1], charx8
 (p[MEMLAT+5])	czx1.r	pos0[0] = tmp3[1]
+(p[MEMLAT+6])	chk.s	r[6 + MEMLAT], .recovery1	// our data isn't
+							// valid - rollback!
 (p[MEMLAT+6])	cmp.ne	p6, p0 = 8, pos0[1]
 (p6)		br.cond.spnt	.gotit
 (p[MEMLAT+6])	st8	[dest] = val[3], 8		// store val to dest
 (p[MEMLAT+3])	or	val[0] = tmp1[3], tmp2[3] 	// val = tmp1 | tmp2
 		br.ctop.sptk    .l2
 		br.cond.sptk .cpyfew
+
 .src_aligned:
 		cmp.ne  p6, p0 = r0, r0			// clear p6
 		mov     ar.ec = MEMLAT + 2 + 1 ;;	// set EC
 .l3:
-(p[0])		ld8	r[0] = [src], 8
+(p[0])		ld8.s	r[0] = [src], 8
 (p[MEMLAT])	xor	tmp3[0] = r[MEMLAT], charx8
 (p[MEMLAT+1])	czx1.r	pos0[0] = tmp3[1]
 (p[MEMLAT+2])	cmp.ne	p7, p0 = 8, pos0[1]
+(p[MEMLAT+2])	chk.s	r[MEMLAT+2], .recovery2
 (p7)		br.cond.spnt	.gotit
+.back2:
 (p[MEMLAT+2])	st8	[dest] = r[MEMLAT+2], 8
 		br.ctop.dptk .l3
 .cpyfew:
@@ -148,6 +156,7 @@ ENTRY(memccpy)
 .restore_and_exit:
 	mov     pr = saved_pr, -1    	// restore the predicate registers
 	mov 	ar.lc = saved_lc	// restore the loop counter
+	mov 	ar.ec = saved_ec ;;	// restore the epilog counter
 	br.ret.sptk.many b0
 .gotit:
 	.pred.rel "mutex" p6, p7
@@ -163,4 +172,33 @@ ENTRY(memccpy)
 	mov	pr = saved_pr, -1
 	mov	ar.lc = saved_lc
 	br.ret.sptk.many b0
+
+.recovery1:
+	adds	src = -(MEMLAT + 6 + 1) * 8, asrc
+	mov	loopcnt = ar.lc
+	mov	tmp = ar.ec ;;
+	sub	sh1 = (MEMLAT + 6 + 1), tmp
+	shr.u	sh2 = sh2, 3
+	;; 
+	shl	loopcnt = loopcnt, 3
+	sub	src = src, sh2
+	shl	sh1 = sh1, 3
+	shl	tmp = tmp, 3
+	;;
+	add	len = len, loopcnt
+	add	src = sh1, src ;;
+	add	len = tmp, len
+.back1:
+	br.cond.sptk .cpyfew
+
+.recovery2:
+	add	tmp = -(MEMLAT + 3) * 8, src
+(p7)	br.cond.spnt .gotit
+	;;
+	ld8	r[MEMLAT+2] = [tmp] ;;
+	xor	pos0[1] = r[MEMLAT+2], charx8 ;;
+	czx1.r	pos0[1] = pos0[1] ;;
+	cmp.ne	p7, p6 = 8, pos0[1]
+(p7)	br.cond.spnt .gotit
+	br.cond.sptk .back2
 END(memccpy)