about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2022-03-07 10:47:10 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2022-03-07 21:14:10 -0800
commit5e837461dcbbe25153db3b8437ac4c0030292b51 (patch)
tree61854571d05b6cc95708bb4cd122448a5cbeafda /sysdeps/x86_64/fpu
parent994266f5019560f26e8d07be7fdf8621903339a1 (diff)
downloadglibc-5e837461dcbbe25153db3b8437ac4c0030292b51.tar.gz
glibc-5e837461dcbbe25153db3b8437ac4c0030292b51.tar.xz
glibc-5e837461dcbbe25153db3b8437ac4c0030292b51.zip
x86_64: Fix svml_s_cbrtf16_core_avx512.S code formatting
This commit contains following formatting changes

1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
   between it and the first operand.
3. Instruction greater than 7 characters in length have a
   space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86_64/fpu')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S377
1 files changed, 188 insertions, 189 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
index 9cf7918019..ce10cf177b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
@@ -31,205 +31,204 @@
 
 /* Offsets for data table __svml_scbrt_data_internal_avx512
  */
-#define etbl_H                        	0
-#define etbl_L                        	64
-#define cbrt_tbl_H                    	128
-#define BiasL                         	256
-#define SZero                         	320
-#define OneThird                      	384
-#define Bias3                         	448
-#define Three                         	512
-#define One                           	576
-#define poly_coeff3                   	640
-#define poly_coeff2                   	704
-#define poly_coeff1                   	768
+#define etbl_H				0
+#define etbl_L				64
+#define cbrt_tbl_H			128
+#define BiasL				256
+#define SZero				320
+#define OneThird			384
+#define Bias3				448
+#define Three				512
+#define One				576
+#define poly_coeff3			640
+#define poly_coeff2			704
+#define poly_coeff1			768
 
 #include <sysdep.h>
 
-        .text
-	.section .text.exex512,"ax",@progbits
+	.section .text.exex512, "ax", @progbits
 ENTRY(_ZGVeN16v_cbrtf_skx)
-        vgetmantps $0, {sae}, %zmm0, %zmm8
-
-/* GetExp(x) */
-        vgetexpps {sae}, %zmm0, %zmm1
-        vmovups   BiasL+__svml_scbrt_data_internal_avx512(%rip), %zmm2
-
-/* exponent/3 */
-        vmovups   OneThird+__svml_scbrt_data_internal_avx512(%rip), %zmm3
-        vmovups   Bias3+__svml_scbrt_data_internal_avx512(%rip), %zmm4
-        vmovups   One+__svml_scbrt_data_internal_avx512(%rip), %zmm15
-
-/* exponent%3 (to be used as index) */
-        vmovups   Three+__svml_scbrt_data_internal_avx512(%rip), %zmm5
-
-/* polynomial */
-        vmovups   poly_coeff3+__svml_scbrt_data_internal_avx512(%rip), %zmm11
-        vmovups   poly_coeff1+__svml_scbrt_data_internal_avx512(%rip), %zmm14
-
-/* Table lookup */
-        vmovups   cbrt_tbl_H+__svml_scbrt_data_internal_avx512(%rip), %zmm12
-
-/* DblRcp ~ 1/Mantissa */
-        vrcp14ps  %zmm8, %zmm7
-        vaddps    {rn-sae}, %zmm2, %zmm1, %zmm6
-        vandps    SZero+__svml_scbrt_data_internal_avx512(%rip), %zmm0, %zmm0
-
-/* round DblRcp to 3 fractional bits (RN mode, no Precision exception) */
-        vrndscaleps $88, {sae}, %zmm7, %zmm9
-        vfmsub231ps {rn-sae}, %zmm6, %zmm3, %zmm4
-        vmovups   poly_coeff2+__svml_scbrt_data_internal_avx512(%rip), %zmm7
-
-/* Reduced argument: R = DblRcp*Mantissa - 1 */
-        vfmsub231ps {rn-sae}, %zmm9, %zmm8, %zmm15
-        vrndscaleps $9, {sae}, %zmm4, %zmm13
-
-/* Prepare table index */
-        vpsrld    $19, %zmm9, %zmm10
-        vfmadd231ps {rn-sae}, %zmm15, %zmm11, %zmm7
-        vfnmadd231ps {rn-sae}, %zmm13, %zmm5, %zmm6
-        vpermt2ps cbrt_tbl_H+64+__svml_scbrt_data_internal_avx512(%rip), %zmm10, %zmm12
-        vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm7
-        vscalefps {rn-sae}, %zmm13, %zmm12, %zmm2
-
-/* Table lookup: 2^(exponent%3) */
-        vpermps   __svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm1
-        vpermps   etbl_L+__svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm6
-
-/* Sh*R */
-        vmulps    {rn-sae}, %zmm15, %zmm1, %zmm14
-
-/* Sl + (Sh*R)*Poly */
-        vfmadd213ps {rn-sae}, %zmm6, %zmm7, %zmm14
-
-/*
- * branch-free
- * scaled_Th*(Sh+Sl+Sh*R*Poly)
- */
-        vaddps    {rn-sae}, %zmm1, %zmm14, %zmm15
-        vmulps    {rn-sae}, %zmm2, %zmm15, %zmm3
-        vorps     %zmm0, %zmm3, %zmm0
-        ret
+	vgetmantps $0, {sae}, %zmm0, %zmm8
+
+	/* GetExp(x) */
+	vgetexpps {sae}, %zmm0, %zmm1
+	vmovups	BiasL+__svml_scbrt_data_internal_avx512(%rip), %zmm2
+
+	/* exponent/3 */
+	vmovups	OneThird+__svml_scbrt_data_internal_avx512(%rip), %zmm3
+	vmovups	Bias3+__svml_scbrt_data_internal_avx512(%rip), %zmm4
+	vmovups	One+__svml_scbrt_data_internal_avx512(%rip), %zmm15
+
+	/* exponent%3 (to be used as index) */
+	vmovups	Three+__svml_scbrt_data_internal_avx512(%rip), %zmm5
+
+	/* polynomial */
+	vmovups	poly_coeff3+__svml_scbrt_data_internal_avx512(%rip), %zmm11
+	vmovups	poly_coeff1+__svml_scbrt_data_internal_avx512(%rip), %zmm14
+
+	/* Table lookup */
+	vmovups	cbrt_tbl_H+__svml_scbrt_data_internal_avx512(%rip), %zmm12
+
+	/* DblRcp ~ 1/Mantissa */
+	vrcp14ps %zmm8, %zmm7
+	vaddps	{rn-sae}, %zmm2, %zmm1, %zmm6
+	vandps	SZero+__svml_scbrt_data_internal_avx512(%rip), %zmm0, %zmm0
+
+	/* round DblRcp to 3 fractional bits (RN mode, no Precision exception) */
+	vrndscaleps $88, {sae}, %zmm7, %zmm9
+	vfmsub231ps {rn-sae}, %zmm6, %zmm3, %zmm4
+	vmovups	poly_coeff2+__svml_scbrt_data_internal_avx512(%rip), %zmm7
+
+	/* Reduced argument: R = DblRcp*Mantissa - 1 */
+	vfmsub231ps {rn-sae}, %zmm9, %zmm8, %zmm15
+	vrndscaleps $9, {sae}, %zmm4, %zmm13
+
+	/* Prepare table index */
+	vpsrld	$19, %zmm9, %zmm10
+	vfmadd231ps {rn-sae}, %zmm15, %zmm11, %zmm7
+	vfnmadd231ps {rn-sae}, %zmm13, %zmm5, %zmm6
+	vpermt2ps cbrt_tbl_H+64+__svml_scbrt_data_internal_avx512(%rip), %zmm10, %zmm12
+	vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm7
+	vscalefps {rn-sae}, %zmm13, %zmm12, %zmm2
+
+	/* Table lookup: 2^(exponent%3) */
+	vpermps	__svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm1
+	vpermps	etbl_L+__svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm6
+
+	/* Sh*R */
+	vmulps	{rn-sae}, %zmm15, %zmm1, %zmm14
+
+	/* Sl + (Sh*R)*Poly */
+	vfmadd213ps {rn-sae}, %zmm6, %zmm7, %zmm14
+
+	/*
+	 * branch-free
+	 * scaled_Th*(Sh+Sl+Sh*R*Poly)
+	 */
+	vaddps	{rn-sae}, %zmm1, %zmm14, %zmm15
+	vmulps	{rn-sae}, %zmm2, %zmm15, %zmm3
+	vorps	%zmm0, %zmm3, %zmm0
+	ret
 
 END(_ZGVeN16v_cbrtf_skx)
 
-        .section .rodata, "a"
-        .align 64
+	.section .rodata, "a"
+	.align	64
 
 #ifdef __svml_scbrt_data_internal_avx512_typedef
 typedef unsigned int VUINT32;
 typedef struct {
-        __declspec(align(64)) VUINT32 etbl_H[16][1];
-        __declspec(align(64)) VUINT32 etbl_L[16][1];
-        __declspec(align(64)) VUINT32 cbrt_tbl_H[32][1];
-        __declspec(align(64)) VUINT32 BiasL[16][1];
-        __declspec(align(64)) VUINT32 SZero[16][1];
-        __declspec(align(64)) VUINT32 OneThird[16][1];
-        __declspec(align(64)) VUINT32 Bias3[16][1];
-        __declspec(align(64)) VUINT32 Three[16][1];
-        __declspec(align(64)) VUINT32 One[16][1];
-        __declspec(align(64)) VUINT32 poly_coeff3[16][1];
-        __declspec(align(64)) VUINT32 poly_coeff2[16][1];
-        __declspec(align(64)) VUINT32 poly_coeff1[16][1];
-    } __svml_scbrt_data_internal_avx512;
+	__declspec(align(64)) VUINT32 etbl_H[16][1];
+	__declspec(align(64)) VUINT32 etbl_L[16][1];
+	__declspec(align(64)) VUINT32 cbrt_tbl_H[32][1];
+	__declspec(align(64)) VUINT32 BiasL[16][1];
+	__declspec(align(64)) VUINT32 SZero[16][1];
+	__declspec(align(64)) VUINT32 OneThird[16][1];
+	__declspec(align(64)) VUINT32 Bias3[16][1];
+	__declspec(align(64)) VUINT32 Three[16][1];
+	__declspec(align(64)) VUINT32 One[16][1];
+	__declspec(align(64)) VUINT32 poly_coeff3[16][1];
+	__declspec(align(64)) VUINT32 poly_coeff2[16][1];
+	__declspec(align(64)) VUINT32 poly_coeff1[16][1];
+} __svml_scbrt_data_internal_avx512;
 #endif
 __svml_scbrt_data_internal_avx512:
-        /*== etbl_H ==*/
-        .long 0x3f800000
-        .long 0x3fa14518
-        .long 0x3fcb2ff5
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        /*== etbl_L ==*/
-        .align 64
-        .long 0x00000000
-        .long 0xb2ce51af
-        .long 0x32a7adc8
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        /*== cbrt_tbl_H ==*/
-        .align 64
-        .long 0x3fa14518
-        .long 0x3f9e0b2b
-        .long 0x3f9b0f9b
-        .long 0x3f984a9a
-        .long 0x3f95b5af
-        .long 0x3f934b6c
-        .long 0x3f910737
-        .long 0x3f8ee526
-        .long 0x3f8ce1da
-        .long 0x3f8afa6a
-        .long 0x3f892c4e
-        .long 0x3f87754e
-        .long 0x3f85d377
-        .long 0x3f844510
-        .long 0x3f82c892
-        .long 0x3f815c9f
-        .long 0x3f800000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        .long 0x00000000
-        /*== BiasL ==*/
-        .align 64
-        .long 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000
-        /*== Zero ==*/
-        .align 64
-        .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
-        /*== OneThird ==*/
-        .align 64
-        .long 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab
-        /*== Bias3 ==*/
-        .align 64
-        .long 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000
-        /*== Three ==*/
-        .align 64
-        .long 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000
-        /*==One ==*/
-        .align 64
-        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
-        /*== poly_coeff3 ==*/
-        .align 64
-        .long 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c
-        /*== poly_coeff2 ==*/
-        .align 64
-        .long 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363
-        /*== poly_coeff1 ==*/
-        .align 64
-        .long 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa
-        .align 64
-        .type	__svml_scbrt_data_internal_avx512,@object
-        .size	__svml_scbrt_data_internal_avx512,.-__svml_scbrt_data_internal_avx512
+	/* etbl_H */
+	.long	0x3f800000
+	.long	0x3fa14518
+	.long	0x3fcb2ff5
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	/* etbl_L */
+	.align	64
+	.long	0x00000000
+	.long	0xb2ce51af
+	.long	0x32a7adc8
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	/* cbrt_tbl_H */
+	.align	64
+	.long	0x3fa14518
+	.long	0x3f9e0b2b
+	.long	0x3f9b0f9b
+	.long	0x3f984a9a
+	.long	0x3f95b5af
+	.long	0x3f934b6c
+	.long	0x3f910737
+	.long	0x3f8ee526
+	.long	0x3f8ce1da
+	.long	0x3f8afa6a
+	.long	0x3f892c4e
+	.long	0x3f87754e
+	.long	0x3f85d377
+	.long	0x3f844510
+	.long	0x3f82c892
+	.long	0x3f815c9f
+	.long	0x3f800000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	/* BiasL */
+	.align	64
+	.long	0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000
+	/* Zero */
+	.align	64
+	.long	0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
+	/* OneThird */
+	.align	64
+	.long	0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab
+	/* Bias3 */
+	.align	64
+	.long	0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000
+	/* Three */
+	.align	64
+	.long	0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000
+	/* One */
+	.align	64
+	.long	0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+	/* poly_coeff3 */
+	.align	64
+	.long	0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c
+	/* poly_coeff2 */
+	.align	64
+	.long	0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363
+	/* poly_coeff1 */
+	.align	64
+	.long	0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa
+	.align	64
+	.type	__svml_scbrt_data_internal_avx512, @object
+	.size	__svml_scbrt_data_internal_avx512, .-__svml_scbrt_data_internal_avx512