about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-27 02:31:13 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-30 05:43:24 -0700
commit9a89973274f901f4c3313e6e2b84d6b2108c7924 (patch)
tree46dec892b25c138467c6fc7109eb64392b94bf50
parentdadf23f06c134d09b5930e09bddfdab2c0ca6150 (diff)
downloadglibc-9a89973274f901f4c3313e6e2b84d6b2108c7924.tar.gz
glibc-9a89973274f901f4c3313e6e2b84d6b2108c7924.tar.xz
glibc-9a89973274f901f4c3313e6e2b84d6b2108c7924.zip
i586: Use a jump table in strcpy.S [BZ #22353]
i586 strcpy.S used a clever trick with LEA to avoid jump table:

/* ECX has the last 2 bits of the address of source - 1.  */
	andl	$3, %ecx

        call    2f
2:      popl    %edx
	/* 0xb is the distance between 2: and 1:.  */
        leal    0xb(%edx,%ecx,8), %ecx
        jmp     *%ecx

        .align 8
1:  /* ECX == 0 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 1 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 2 */
        orb     (%esi), %al
        jz      L(end)
        stosb
        xorl    %eax, %eax
        incl    %esi
    /* ECX == 3 */
L(1):   movl    (%esi), %ecx
        leal    4(%esi),%esi

This may fail if there are instruction length changes before L(1):.  This
patch replaces it with a jump table which works with any instruction length
changes.

Tested on i586 and i686 with and without --disable-multi-arch.

	[BZ #22353]
	* sysdeps/i386/i586/strcpy.S (JMPTBL): New.
	(BRANCH_TO_JMPTBL_ENTRY): Likewise.
	(STRCPY): Use it.
	(1): Renamed to ...
	(L(Src0)): This.
	(L(Src1)): New.
	(L(Src2)): Likewise.
	(L(1)): Renamed to ...
	(L(Src3)): This.
	(L(SrcTable)): New.
-rw-r--r--sysdeps/i386/i586/strcpy.S62
1 files changed, 45 insertions, 17 deletions
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index a444604f4f..93842429da 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -29,6 +29,34 @@
 # define STRCPY strcpy
 #endif
 
+#ifdef PIC
+# define JMPTBL(I, B)	I - B
+
+/* Load an entry in a jump table into EDX and branch to it. TABLE is a
+   jump table with relative offsets.  INDEX is a register contains the
+   index into the jump table.  SCALE is the scale of INDEX.  */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
+	/* We first load PC into EDX.  */			\
+	SETUP_PIC_REG(dx);					\
+	/* Get the address of the jump table.  */		\
+	addl	$(TABLE - .), %edx;				\
+	/* Get the entry and convert the relative offset to the \
+	   absolute address.  */				\
+	addl	(%edx,INDEX,SCALE), %edx;			\
+	/* We loaded the jump table and adjusted EDX. Go.  */  \
+	jmp	*%edx
+#else
+# define JMPTBL(I, B)	I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+   absolute offsets.  INDEX is a register contains the index into the
+   jump	table.  SCALE is the scale of INDEX.  */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
+	jmp	*TABLE(,INDEX,SCALE)
+#endif
+
 #define magic 0xfefefeff
 
 	.text
@@ -53,41 +81,32 @@ ENTRY (STRCPY)
 	cfi_rel_offset (ebx, 0)
 	andl	$3, %ecx
 
-#ifdef PIC
-	call	2f
-	cfi_adjust_cfa_offset (4)
-2:	popl	%edx
-	cfi_adjust_cfa_offset (-4)
-	/* 0xb is the distance between 2: and 1: but we avoid writing
-	   1f-2b because the assembler generates worse code.  */
-	leal	0xb(%edx,%ecx,8), %ecx
-#else
-	leal	1f(,%ecx,8), %ecx
-#endif
-
-	jmp	*%ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(SrcTable), %ecx, 4)
 
-	.align 8
-1:
+	.p2align 4
+L(Src0):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
+L(Src1):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
+L(Src2):
 	orb	(%esi), %al
 	jz	L(end)
 	stosb
 	xorl	%eax, %eax
 	incl	%esi
 
-L(1):	movl	(%esi), %ecx
+L(Src3):
+	movl	(%esi), %ecx
 	leal	4(%esi),%esi
 
 	subl	%ecx, %eax
@@ -107,7 +126,7 @@ L(1):	movl	(%esi), %ecx
 	movl	%edx, (%edi)
 	leal	4(%edi),%edi
 
-	jmp	L(1)
+	jmp	L(Src3)
 
 L(3):	movl	%ecx, %edx
 
@@ -164,6 +183,15 @@ L(end2):
 
 	ret
 END (STRCPY)
+
+	.p2align 2
+	.section .rodata
+L(SrcTable):
+	.int	JMPTBL (L(Src0), L(SrcTable))
+	.int	JMPTBL (L(Src1), L(SrcTable))
+	.int	JMPTBL (L(Src2), L(SrcTable))
+	.int	JMPTBL (L(Src3), L(SrcTable))
+
 #ifndef USE_AS_STPCPY
 libc_hidden_builtin_def (strcpy)
 #endif