about summary refs log tree commit diff
path: root/sysdeps/ia64/memccpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/memccpy.S')
-rw-r--r--sysdeps/ia64/memccpy.S52
1 files changed, 45 insertions, 7 deletions
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S
index 7ffe9b9f38..6bccb96b70 100644
--- a/sysdeps/ia64/memccpy.S
+++ b/sysdeps/ia64/memccpy.S
@@ -1,6 +1,6 @@
 /* Optimized version of the memccpy() function.
    This file is part of the GNU C Library.
-   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
    Contributed by Dan Pop <Dan.Pop@cern.ch>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -46,6 +46,7 @@
 #define tmp		r23
 #define char		r24
 #define charx8		r25
+#define saved_ec	r26
 #define sh2		r28
 #define	sh1		r29
 #define loopcnt		r30
@@ -56,25 +57,27 @@ ENTRY(memccpy)
 	alloc 	r2 = ar.pfs, 4, 40 - 4, 0, 40
 
 #include "softpipe.h"
-	.rotr	r[MEMLAT + 3], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
+	.rotr	r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
 	.rotp	p[MEMLAT + 6 + 1]
 
 	mov	ret0 = r0		// return NULL if no match
 	.save pr, saved_pr
 	mov	saved_pr = pr		// save the predicate registers
+	mov 	dest = in0		// dest
 	.save ar.lc, saved_lc
         mov 	saved_lc = ar.lc	// save the loop counter
+        mov 	saved_ec = ar.ec	// save the loop counter
 	.body
-	mov 	dest = in0		// dest
 	mov 	src = in1		// src
 	extr.u	char = in2, 0, 8	// char
 	mov	len = in3		// len
 	sub	tmp = r0, in0		// tmp = -dest
 	cmp.ne	p7, p0 = r0, r0		// clear p7
 	;;
-	and	loopcnt = 7, tmp		// loopcnt = -dest % 8
+	and	loopcnt = 7, tmp	// loopcnt = -dest % 8
 	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES
-(p6)	br.cond.spnt	.cpyfew			// copy byte by byte
+	mov	ar.ec = 0		// ec not guaranteed zero on entry
+(p6)	br.cond.spnt	.cpyfew		// copy byte by byte
 	;;
 	cmp.eq	p6, p0 = loopcnt, r0
 	mux1	charx8 = char, @brcst
@@ -109,26 +112,31 @@ ENTRY(memccpy)
 	cmp.ne	p6, p0 = r0, r0	;;	// clear p6
 	.align	32
 .l2:
-(p[0])		ld8	r[0] = [asrc], 8		// r[0] = w1
+(p[0])		ld8.s	r[0] = [asrc], 8		// r[0] = w1
 (p[MEMLAT])	shr.u	tmp1[0] = r[1 + MEMLAT], sh1	// tmp1 = w0 >> sh1
 (p[MEMLAT])	shl	tmp2[0] = r[0 + MEMLAT], sh2  	// tmp2 = w1 << sh2
 (p[MEMLAT+4])	xor	tmp3[0] = val[1], charx8
 (p[MEMLAT+5])	czx1.r	pos0[0] = tmp3[1]
+(p[MEMLAT+6])	chk.s	r[6 + MEMLAT], .recovery1	// our data isn't
+							// valid - rollback!
 (p[MEMLAT+6])	cmp.ne	p6, p0 = 8, pos0[1]
 (p6)		br.cond.spnt	.gotit
 (p[MEMLAT+6])	st8	[dest] = val[3], 8		// store val to dest
 (p[MEMLAT+3])	or	val[0] = tmp1[3], tmp2[3] 	// val = tmp1 | tmp2
 		br.ctop.sptk    .l2
 		br.cond.sptk .cpyfew
+
 .src_aligned:
 		cmp.ne  p6, p0 = r0, r0			// clear p6
 		mov     ar.ec = MEMLAT + 2 + 1 ;;	// set EC
 .l3:
-(p[0])		ld8	r[0] = [src], 8
+(p[0])		ld8.s	r[0] = [src], 8
 (p[MEMLAT])	xor	tmp3[0] = r[MEMLAT], charx8
 (p[MEMLAT+1])	czx1.r	pos0[0] = tmp3[1]
 (p[MEMLAT+2])	cmp.ne	p7, p0 = 8, pos0[1]
+(p[MEMLAT+2])	chk.s	r[MEMLAT+2], .recovery2
 (p7)		br.cond.spnt	.gotit
+.back2:
 (p[MEMLAT+2])	st8	[dest] = r[MEMLAT+2], 8
 		br.ctop.dptk .l3
 .cpyfew:
@@ -148,6 +156,7 @@ ENTRY(memccpy)
 .restore_and_exit:
 	mov     pr = saved_pr, -1    	// restore the predicate registers
 	mov 	ar.lc = saved_lc	// restore the loop counter
+	mov 	ar.ec = saved_ec ;;	// restore the epilog counter
 	br.ret.sptk.many b0
 .gotit:
 	.pred.rel "mutex" p6, p7
@@ -163,4 +172,33 @@ ENTRY(memccpy)
 	mov	pr = saved_pr, -1
 	mov	ar.lc = saved_lc
 	br.ret.sptk.many b0
+
+.recovery1:
+	adds	src = -(MEMLAT + 6 + 1) * 8, asrc
+	mov	loopcnt = ar.lc
+	mov	tmp = ar.ec ;;
+	sub	sh1 = (MEMLAT + 6 + 1), tmp
+	shr.u	sh2 = sh2, 3
+	;; 
+	shl	loopcnt = loopcnt, 3
+	sub	src = src, sh2
+	shl	sh1 = sh1, 3
+	shl	tmp = tmp, 3
+	;;
+	add	len = len, loopcnt
+	add	src = sh1, src ;;
+	add	len = tmp, len
+.back1:
+	br.cond.sptk .cpyfew
+
+.recovery2:
+	add	tmp = -(MEMLAT + 3) * 8, src
+(p7)	br.cond.spnt .gotit
+	;;
+	ld8	r[MEMLAT+2] = [tmp] ;;
+	xor	pos0[1] = r[MEMLAT+2], charx8 ;;
+	czx1.r	pos0[1] = pos0[1] ;;
+	cmp.ne	p7, p6 = 8, pos0[1]
+(p7)	br.cond.spnt .gotit
+	br.cond.sptk .back2
 END(memccpy)