diff options
author | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-07-24 14:47:23 +0300 |
---|---|---|
committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-07-24 14:47:23 +0300 |
commit | 99017161354321845d11dce4fcd3abfebc5dd0d5 (patch) | |
tree | 50c62fe44aef915a84b1eb5fb0ad787e39f5a210 /sysdeps/x86_64/fpu/multiarch | |
parent | 3bcea719ddd6ce399d7bccb492c40af77d216e42 (diff) | |
download | glibc-99017161354321845d11dce4fcd3abfebc5dd0d5.tar.gz glibc-99017161354321845d11dce4fcd3abfebc5dd0d5.tar.xz glibc-99017161354321845d11dce4fcd3abfebc5dd0d5.zip |
Fixed several libmvec bugs found during testing on KNL hardware.
AVX512 IFUNC implementations, implementations of wrappers to AVX2 versions and KNL expf implementation fixed. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S: Fixed AVX512 IFUNC. * sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Fixed wrappers to AVX2. * sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S: Fixed KNL implementation.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch')
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S | 10 |
13 files changed, 61 insertions, 60 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S index ba3b66f69f..d0f4f27f46 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8v_cos) .type _ZGVeN8v_cos, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8v_cos_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8v_cos_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8v_cos_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8v_cos) #define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S index 8f837fbfb9..7b7c07d926 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8v_exp) .type _ZGVeN8v_exp, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8v_exp_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8v_exp_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8v_exp_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8v_exp) #define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S index 2f9e9d8892..76375fdae0 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8v_log) .type _ZGVeN8v_log, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8v_log_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8v_log_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8v_log_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8v_log) #define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S index 3b11511e51..c1e5e76f92 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8vv_pow) .type _ZGVeN8vv_pow, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8vv_pow_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8vv_pow_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8vv_pow_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8vv_pow) #define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S index ba631020f3..131f2f47c5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8v_sin) .type _ZGVeN8v_sin, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8v_sin_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8v_sin_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8v_sin_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8v_sin) #define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S index 7228ba549a..e33109099e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN8vvv_sincos) .type _ZGVeN8vvv_sincos, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN8vvv_sincos_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN8vvv_sincos_knl(%rip), %rax + jnz 2f + leaq _ZGVeN8vvv_sincos_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN8vvv_sincos) #define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S index 91564de22a..0654d3c19b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16v_cosf) .type _ZGVeN16v_cosf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16v_cosf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16v_cosf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16v_cosf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16v_cosf) #define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S index 3b3489d05a..62858eb39e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16v_expf) .type _ZGVeN16v_expf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16v_expf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16v_expf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16v_expf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16v_expf) #define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S index cb807e0757..ec69055351 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S @@ -46,6 +46,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf The table lookup is skipped if k = 0. For low accuracy approximation, exp(r) ~ 1 or 1+r. */ + pushq %rbp cfi_adjust_cfa_offset (8) cfi_rel_offset (%rbp, 0) movq %rsp, %rbp diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S index 8756750c86..68c57e4386 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16v_logf) .type _ZGVeN16v_logf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16v_logf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16v_logf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16v_logf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16v_logf) #define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S index a4ba4fbc04..3aa9f952ce 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16vv_powf) .type _ZGVeN16vv_powf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16vv_powf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16vv_powf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16vv_powf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16vv_powf) #define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S index 0a1753eab7..bdcabab6e2 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16vvv_sincosf) .type _ZGVeN16vvv_sincosf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16vvv_sincosf) #define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S index 7ed637b8e6..3ec78a0b5e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S @@ -23,16 +23,16 @@ ENTRY (_ZGVeN16v_sinf) .type _ZGVeN16v_sinf, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1 + jne 1f call __init_cpu_features 1: leaq _ZGVeN16v_sinf_skx(%rip), %rax testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) - jnz 3 -2: leaq _ZGVeN16v_sinf_knl(%rip), %rax + jnz 2f + leaq _ZGVeN16v_sinf_knl(%rip), %rax testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) - jnz 3 + jnz 2f leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax -3: ret +2: ret END (_ZGVeN16v_sinf) #define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper |