diff options
author | Florian Weimer <fweimer@redhat.com> | 2017-06-23 17:23:44 +0200 |
---|---|---|
committer | Florian Weimer <fweimer@redhat.com> | 2017-06-23 17:24:40 +0200 |
commit | 3ec7c02cc3e922b9364dc8cfd1d4546671b91003 (patch) | |
tree | 283d54448fe89359272093156316884e61992c9a /sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | |
parent | 7fa1d9462baabc5a1058efc13a48444af4678acf (diff) | |
download | glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.gz glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.xz glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.zip |
x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
This code: L(between_2_3): /* Load as big endian with overlapping loads and bswap to avoid branches. */ movzwl -2(%rdi, %rdx), %eax movzwl -2(%rsi, %rdx), %ecx shll $16, %eax shll $16, %ecx movzwl (%rdi), %edi movzwl (%rsi), %esi orl %edi, %eax orl %esi, %ecx bswap %eax bswap %ecx subl %ecx, %eax ret needs a saturating subtract because the full register is used. With this commit, only the lower 24 bits of the register are used, so a regular subtraction suffices. The test case change adds coverage for these kinds of bugs.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S index 47630dd97b..9d1921033e 100644 --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S @@ -137,18 +137,18 @@ L(exit): .p2align 4 L(between_2_3): - /* Load as big endian with overlapping loads and bswap to avoid - branches. */ - movzwl -2(%rdi, %rdx), %eax - movzwl -2(%rsi, %rdx), %ecx - shll $16, %eax - shll $16, %ecx - movzwl (%rdi), %edi - movzwl (%rsi), %esi - orl %edi, %eax - orl %esi, %ecx + /* Load as big endian to avoid branches. */ + movzwl (%rdi), %eax + movzwl (%rsi), %ecx + shll $8, %eax + shll $8, %ecx bswap %eax bswap %ecx + movzbl -1(%rdi, %rdx), %edi + movzbl -1(%rsi, %rdx), %esi + orl %edi, %eax + orl %esi, %ecx + /* Subtraction is okay because the upper 8 bits a zero. */ subl %ecx, %eax ret |