diff options
author | Stefan Liebler <stli@linux.ibm.com> | 2020-06-26 09:45:11 +0200 |
---|---|---|
committer | Stefan Liebler <stli@linux.ibm.com> | 2020-06-26 09:45:11 +0200 |
commit | 0792c8ae1aebf538de45ff9a0e2e401a60525de2 (patch) | |
tree | 7928bae9286828c8c9d69f7418299ff9143abb51 | |
parent | 2034c70e64b31e48140c8e31c5ae839af5ccb6eb (diff) | |
download | glibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.tar.gz glibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.tar.xz glibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.zip |
S390: Optimize __memcpy_z196.
This patch introduces an extra loop without pfd instructions as it turned out that the pfd instructions are usefull for copies >=64KB but are counterproductive for smaller copies.
-rw-r--r-- | sysdeps/s390/memcpy-z900.S | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S index f2e9aaeb2d..dc2f491ec3 100644 --- a/sysdeps/s390/memcpy-z900.S +++ b/sysdeps/s390/memcpy-z900.S @@ -184,25 +184,34 @@ ENTRY(MEMCPY_Z196) je .L_Z196_4 .L_Z196_start2: aghi %r4,-1 - srlg %r5,%r4,8 - ltgr %r5,%r5 + risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256 jne .L_Z196_5 .L_Z196_3: exrl %r4,.L_Z196_14 .L_Z196_4: br %r14 .L_Z196_5: - cgfi %r5,262144 # Switch to mvcle for copies >64MB - jh __memcpy_mvcle + cgfi %r5,255 # Switch to loop with pfd for copies >=64kB + jh .L_Z196_6 .L_Z196_2: - pfd 1,768(%r3) - pfd 2,768(%r1) mvc 0(256,%r1),0(%r3) aghi %r5,-1 la %r1,256(%r1) la %r3,256(%r3) jne .L_Z196_2 j .L_Z196_3 +.L_Z196_6: + cgfi %r5,262144 # Switch to mvcle for copies >64MB + jh __memcpy_mvcle +.L_Z196_7: + pfd 1,1024(%r3) + pfd 2,1024(%r1) + mvc 0(256,%r1),0(%r3) + aghi %r5,-1 + la %r1,256(%r1) + la %r3,256(%r3) + jne .L_Z196_7 + j .L_Z196_3 .L_Z196_14: mvc 0(1,%r1),0(%r3) END(MEMCPY_Z196) |