about summary refs log tree commit diff
path: root/sysdeps/i386
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-30 10:02:16 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-30 10:02:30 -0700
commitc5cc45148c89cc5c57d1946348dd242d4db5c5f5 (patch)
treeceee1e4fcc30daa0d3716353d5ffd6ac587508e1 /sysdeps/i386
parentce12269fac8cb873df1a8785e4a6cde870855590 (diff)
downloadglibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.tar.gz
glibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.tar.xz
glibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.zip
i586: Use conditional branches in strcpy.S [BZ #22353]
i586 strcpy.S used a clever trick with LEA to implement jump table:

/* ECX has the last 2 bits of the address of source - 1.  */
	andl	$3, %ecx

        call    2f
2:      popl    %edx
	/* 0xb is the distance between 2: and 1:.  */
        leal    0xb(%edx,%ecx,8), %ecx
        jmp     *%ecx

        .align 8
1:  /* ECX == 0 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 1 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 2 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 3 */
L(1):   movl    (%esi), %ecx
        leal    4(%esi),%esi

This fails if there are instruction length changes before L(1):.  This
patch replaces it with conditional branches:

	cmpb	$2, %cl
	je	L(Src2)
	ja	L(Src3)
	cmpb	$1, %cl
	je	L(Src1)

L(Src0):

which have similar performance and work with any instruction lengths.

Tested on i586 and i686 with and without --disable-multi-arch.

	[BZ #22353]
	* sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches.
	(1): Renamed to ...
	(L(Src0)): This.
	(L(Src1)): New.
	(L(Src2)): Likewise.
	(L(1)): Renamed to ...
	(L(Src3)): This.
Diffstat (limited to 'sysdeps/i386')
-rw-r--r--sysdeps/i386/i586/strcpy.S28
1 files changed, 11 insertions, 17 deletions
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index a444604f4f..bb73ca4ef3 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -53,41 +53,35 @@ ENTRY (STRCPY)
 	cfi_rel_offset (ebx, 0)
 	andl	$3, %ecx
 
-#ifdef PIC
-	call	2f
-	cfi_adjust_cfa_offset (4)
-2:	popl	%edx
-	cfi_adjust_cfa_offset (-4)
-	/* 0xb is the distance between 2: and 1: but we avoid writing
-	   1f-2b because the assembler generates worse code.  */
-	leal	0xb(%edx,%ecx,8), %ecx
-#else
-	leal	1f(,%ecx,8), %ecx
-#endif
-
-	jmp	*%ecx
+	cmpb	$2, %cl
+	je	L(Src2)
+	ja	L(Src3)
+	cmpb	$1, %cl
+	je	L(Src1)
 
-	.align 8
-1:
+L(Src0):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
+L(Src1):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
+L(Src2):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
-L(1):	movl	(%esi), %ecx
+L(Src3):
+	movl	(%esi), %ecx
 	leal	4(%esi),%esi
 
 	subl	%ecx, %eax
@@ -107,7 +101,7 @@ L(1):	movl	(%esi), %ecx
 	movl	%edx, (%edi)
 	leal	4(%edi),%edi
 
-	jmp	L(1)
+	jmp	L(Src3)
 
 L(3):	movl	%ecx, %edx