about summary refs log tree commit diff
path: root/src/string/i386/memset.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/string/i386/memset.s')
-rw-r--r--src/string/i386/memset.s93
1 files changed, 61 insertions, 32 deletions
diff --git a/src/string/i386/memset.s b/src/string/i386/memset.s
index 06ac923e..d00422c4 100644
--- a/src/string/i386/memset.s
+++ b/src/string/i386/memset.s
@@ -1,47 +1,76 @@
 .global memset
 .type memset,@function
 memset:
-	mov 8(%esp),%al
-	push %edi
-	mov %al,%ah
-	mov %al,%dl
-	mov 16(%esp),%ecx
-	shl $16,%eax
-	mov 8(%esp),%edi
-	mov %dl,%al
-	mov %dl,%ah
-	cmp $16,%ecx
-	jb 1f
+	mov 12(%esp),%ecx
+	cmp $62,%ecx
+	ja 2f
 
-	mov %eax,-4(%edi,%ecx)
-	shr $2,%ecx
-	rep
-	stosl
-	mov 8(%esp),%eax
-	pop %edi
-	ret
-
-1:	test %ecx,%ecx
+	mov 8(%esp),%dl
+	mov 4(%esp),%eax
+	test %ecx,%ecx
 	jz 1f
 
-	mov %al,(%edi)
-	mov %al,-1(%edi,%ecx)
+	mov %dl,%dh
+
+	mov %dl,(%eax)
+	mov %dl,-1(%eax,%ecx)
 	cmp $2,%ecx
 	jbe 1f
 
-	mov %al,1(%edi)
-	mov %al,-2(%edi,%ecx)
-	cmp $4,%ecx
+	mov %dx,1(%eax)
+	mov %dx,(-1-2)(%eax,%ecx)
+	cmp $6,%ecx
 	jbe 1f
 
-	mov %eax,(%edi)
-	mov %eax,-4(%edi,%ecx)
-	cmp $8,%ecx
+	shl $16,%edx
+	mov 8(%esp),%dl
+	mov 8(%esp),%dh
+
+	mov %edx,(1+2)(%eax)
+	mov %edx,(-1-2-4)(%eax,%ecx)
+	cmp $14,%ecx
 	jbe 1f
 
-	mov %eax,4(%edi)
-	mov %eax,-8(%edi,%ecx)
+	mov %edx,(1+2+4)(%eax)
+	mov %edx,(1+2+4+4)(%eax)
+	mov %edx,(-1-2-4-8)(%eax,%ecx)
+	mov %edx,(-1-2-4-4)(%eax,%ecx)
+	cmp $30,%ecx
+	jbe 1f
+
+	mov %edx,(1+2+4+8)(%eax)
+	mov %edx,(1+2+4+8+4)(%eax)
+	mov %edx,(1+2+4+8+8)(%eax)
+	mov %edx,(1+2+4+8+12)(%eax)
+	mov %edx,(-1-2-4-8-16)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-12)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-8)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-4)(%eax,%ecx)
+
+1:	ret 	
+
+2:	movzbl 8(%esp),%eax
+	mov %edi,12(%esp)
+	imul $0x1010101,%eax
+	mov 4(%esp),%edi
+	test $15,%edi
+	mov %eax,-4(%edi,%ecx)
+	jnz 2f
 
-1:	mov 8(%esp),%eax
-	pop %edi
+1:	shr $2, %ecx
+	rep
+	stosl
+	mov 4(%esp),%eax
+	mov 12(%esp),%edi
 	ret
+	
+2:	xor %edx,%edx
+	sub %edi,%edx
+	and $15,%edx
+	mov %eax,(%edi)
+	mov %eax,4(%edi)
+	mov %eax,8(%edi)
+	mov %eax,12(%edi)
+	sub %edx,%ecx
+	add %edx,%edi
+	jmp 1b