about summary refs log tree commit diff
path: root/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core_avx512.S
blob: ae95fbae912d31b2998af53cfd8c3f90e4dde2ef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
/* Function tanf vectorized with AVX-512.
   Copyright (C) 2021-2023 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   https://www.gnu.org/licenses/.  */

/*
 * ALGORITHM DESCRIPTION:
 *
 *      ( optimized for throughput, with small table lookup, works when HW FMA is available )
 *
 *       Implementation reduces argument x to |R|<pi/64
 *       32-entry tables used to store high and low parts of tan(x0)
 *       Argument x = N*pi + x0 + (R);   x0 = k*pi/32, with k in {0, 1, ..., 31}
 *       (very large arguments reduction resolved in _vsreduction_core.i)
 *       Compute result as (tan(x0) + tan(R))/(1-tan(x0)*tan(R))
 *       _HA_ version keeps extra precision for numerator, denominator, and during
 *       final NR-iteration computing quotient.
 *
 *
 */

/* Offsets for data table __svml_stan_data_internal
 */
#define _sInvPI_uisa			0
#define _sPI1_uisa			64
#define _sPI2_uisa			128
#define _sPI3_uisa			192
#define Th_tbl_uisa			256
#define _sPC3_uisa			384
#define _sPC5_uisa			448
#define _sRangeReductionVal_uisa	512
#define _sAbsMask			576
#define _sRangeVal			640
#define _sRShifter			704
#define _sOne				768
#define _sRangeReductionVal		832
#define _sPI1				896
#define _sPI2				960
#define _sPI3				1024

#include <sysdep.h>

	.section .text.evex512, "ax", @progbits
ENTRY(_ZGVeN16v_tanf_skx)
	pushq	%rbp
	cfi_def_cfa_offset(16)
	movq	%rsp, %rbp
	cfi_def_cfa(6, 16)
	cfi_offset(6, -16)
	andq	$-64, %rsp
	subq	$192, %rsp
	xorl	%edx, %edx

	/* Large values check */
	vmovups	_sRangeReductionVal_uisa+__svml_stan_data_internal(%rip), %zmm10

	/*
	 *
	 * Main path
	 *
	 * start arg. reduction
	 */
	vmovups	_sRShifter+__svml_stan_data_internal(%rip), %zmm1
	vmovups	_sPI1_uisa+__svml_stan_data_internal(%rip), %zmm4
	vmovups	_sPI2_uisa+__svml_stan_data_internal(%rip), %zmm2
	vmovups	_sPI3_uisa+__svml_stan_data_internal(%rip), %zmm3
	vmovaps	%zmm0, %zmm11
	vandps	_sAbsMask+__svml_stan_data_internal(%rip), %zmm11, %zmm0
	vcmpps	$22, {sae}, %zmm10, %zmm0, %k6
	vmovups	__svml_stan_data_internal(%rip), %zmm10

	/*
	 *
	 * End of main path
	 */

	kortestw %k6, %k6
	vfmadd213ps {rn-sae}, %zmm1, %zmm11, %zmm10
	vsubps	{rn-sae}, %zmm1, %zmm10, %zmm5
	vfnmadd213ps {rn-sae}, %zmm11, %zmm5, %zmm4
	vfnmadd231ps {rn-sae}, %zmm5, %zmm2, %zmm4
	vfnmadd213ps {rn-sae}, %zmm4, %zmm3, %zmm5

	/* Go to auxiliary branch */
	jne	L(AUX_BRANCH)
	# LOE rbx r12 r13 r14 r15 edx zmm0 zmm5 zmm10 zmm11 k6

	/* Return from auxiliary branch
	 * for out of main path inputs
	 */

L(AUX_BRANCH_RETURN):
	/* Table lookup */
	vmovups	Th_tbl_uisa+__svml_stan_data_internal(%rip), %zmm3
	vmovups	_sPC3_uisa+__svml_stan_data_internal(%rip), %zmm0
	vmulps	{rn-sae}, %zmm5, %zmm5, %zmm1
	vpermt2ps Th_tbl_uisa+64+__svml_stan_data_internal(%rip), %zmm10, %zmm3
	vmovups	_sPC5_uisa+__svml_stan_data_internal(%rip), %zmm10
	vfmadd231ps {rn-sae}, %zmm1, %zmm10, %zmm0
	vmulps	{rn-sae}, %zmm5, %zmm0, %zmm4
	vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4

	/*
	 * Computer Denominator:
	 * sDenominator - sDlow ~= 1-(sTh+sTl)*(sP+sPlow)
	 */
	vmovups	_sOne+__svml_stan_data_internal(%rip), %zmm5
	vmulps	{rn-sae}, %zmm4, %zmm3, %zmm7

	/*
	 * Compute Numerator:
	 * sNumerator + sNlow ~= sTh+sTl+sP+sPlow
	 */
	vaddps	{rn-sae}, %zmm3, %zmm4, %zmm8
	vsubps	{rn-sae}, %zmm7, %zmm5, %zmm9
	vsubps	{rn-sae}, %zmm3, %zmm8, %zmm2

	/*
	 * Now computes (sNumerator + sNlow)/(sDenominator - sDlow)
	 * Choose NR iteration instead of hardware division
	 */
	vrcp14ps %zmm9, %zmm14
	vsubps	{rn-sae}, %zmm5, %zmm9, %zmm6
	vsubps	{rn-sae}, %zmm2, %zmm4, %zmm13
	vmulps	{rn-sae}, %zmm8, %zmm14, %zmm15
	vaddps	{rn-sae}, %zmm7, %zmm6, %zmm12

	/* One NR iteration to refine sQuotient */
	vfmsub213ps {rn-sae}, %zmm8, %zmm15, %zmm9
	vfnmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm12
	vsubps	{rn-sae}, %zmm13, %zmm12, %zmm0
	vfnmadd213ps {rn-sae}, %zmm15, %zmm14, %zmm0
	testl	%edx, %edx

	/* Go to special inputs processing branch */
	jne	L(SPECIAL_VALUES_BRANCH)
	# LOE rbx r12 r13 r14 r15 edx zmm0 zmm11

	/* Restore registers
	 * and exit the function
	 */

L(EXIT):
	movq	%rbp, %rsp
	popq	%rbp
	cfi_def_cfa(7, 8)
	cfi_restore(6)
	ret
	cfi_def_cfa(6, 16)
	cfi_offset(6, -16)

	/* Branch to process
	 * special inputs
	 */

L(SPECIAL_VALUES_BRANCH):
	vmovups	%zmm11, 64(%rsp)
	vmovups	%zmm0, 128(%rsp)
	# LOE rbx r12 r13 r14 r15 edx zmm0

	xorl	%eax, %eax
	# LOE rbx r12 r13 r14 r15 eax edx

	vzeroupper
	movq	%r12, 16(%rsp)
	/*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
	movl	%eax, %r12d
	movq	%r13, 8(%rsp)
	/*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
	movl	%edx, %r13d
	movq	%r14, (%rsp)
	/*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
	# LOE rbx r15 r12d r13d

	/* Range mask
	 * bits check
	 */

L(RANGEMASK_CHECK):
	btl	%r12d, %r13d

	/* Call scalar math function */
	jc	L(SCALAR_MATH_CALL)
	# LOE rbx r15 r12d r13d

	/* Special inputs
	 * processing loop
	 */

L(SPECIAL_VALUES_LOOP):
	incl	%r12d
	cmpl	$16, %r12d

	/* Check bits in range mask */
	jl	L(RANGEMASK_CHECK)
	# LOE rbx r15 r12d r13d

	movq	16(%rsp), %r12
	cfi_restore(12)
	movq	8(%rsp), %r13
	cfi_restore(13)
	movq	(%rsp), %r14
	cfi_restore(14)
	vmovups	128(%rsp), %zmm0

	/* Go to exit */
	jmp	L(EXIT)
	/*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
	/*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
	/*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
	# LOE rbx r12 r13 r14 r15 zmm0

	/* Scalar math function call
	 * to process special input
	 */

L(SCALAR_MATH_CALL):
	movl	%r12d, %r14d
	vmovss	64(%rsp, %r14, 4), %xmm0
	call	tanf@PLT
	# LOE rbx r14 r15 r12d r13d xmm0

	vmovss	%xmm0, 128(%rsp, %r14, 4)

	/* Process special inputs in loop */
	jmp	L(SPECIAL_VALUES_LOOP)
	cfi_restore(12)
	cfi_restore(13)
	cfi_restore(14)
	# LOE rbx r15 r12d r13d

	/* Auxiliary branch
	 * for out of main path inputs
	 */

L(AUX_BRANCH):
	vmovups	_sRangeVal+__svml_stan_data_internal(%rip), %zmm6

	/*
	 * Get the (2^a / 2pi) mod 1 values from the table.
	 * Because doesn't have I-type gather, we need a trivial cast
	 */
	lea	__svml_stan_reduction_data_internal(%rip), %rax
	vmovups	%zmm5, (%rsp)
	vandps	%zmm0, %zmm6, %zmm14
	vcmpps	$0, {sae}, %zmm6, %zmm14, %k0

	/*
	 * Break the P_xxx and m into 16-bit chunks ready for
	 * the long multiplication via 16x16->32 multiplications
	 */
	vmovups	.FLT_15(%rip), %zmm6
	kxnorw	%k0, %k0, %k1
	kxnorw	%k0, %k0, %k2
	kxnorw	%k0, %k0, %k3
	kmovw	%k0, %edx
	vpandd	.FLT_12(%rip), %zmm11, %zmm5
	vpsrld	$23, %zmm5, %zmm7
	vpslld	$1, %zmm7, %zmm8
	vpaddd	%zmm7, %zmm8, %zmm9
	vpslld	$2, %zmm9, %zmm4
	vpxord	%zmm3, %zmm3, %zmm3
	vpxord	%zmm15, %zmm15, %zmm15
	vpxord	%zmm2, %zmm2, %zmm2
	vgatherdps (%rax, %zmm4), %zmm3{%k1}
	vgatherdps 4(%rax, %zmm4), %zmm15{%k2}
	vgatherdps 8(%rax, %zmm4), %zmm2{%k3}
	vpsrld	$16, %zmm3, %zmm5
	vpsrld	$16, %zmm2, %zmm13

	/*
	 * Also get the significand as an integer
	 * NB: adding in the integer bit is wrong for denorms!
	 * To make this work for denorms we should do something slightly different
	 */
	vpandd	.FLT_13(%rip), %zmm11, %zmm0
	vpaddd	.FLT_14(%rip), %zmm0, %zmm1
	vpsrld	$16, %zmm15, %zmm0
	vpsrld	$16, %zmm1, %zmm8
	vpandd	%zmm6, %zmm3, %zmm9
	vpandd	%zmm6, %zmm15, %zmm12
	vpandd	%zmm6, %zmm2, %zmm7
	vpandd	%zmm6, %zmm1, %zmm14

	/* Now do the big multiplication and carry propagation */
	vpmulld	%zmm9, %zmm8, %zmm4
	vpmulld	%zmm0, %zmm8, %zmm3
	vpmulld	%zmm12, %zmm8, %zmm2
	vpmulld	%zmm13, %zmm8, %zmm1
	vpmulld	%zmm7, %zmm8, %zmm8
	vpmulld	%zmm5, %zmm14, %zmm7
	vpmulld	%zmm9, %zmm14, %zmm5
	vpmulld	%zmm0, %zmm14, %zmm9
	vpmulld	%zmm12, %zmm14, %zmm0
	vpmulld	%zmm13, %zmm14, %zmm12
	vpsrld	$16, %zmm12, %zmm14
	vpsrld	$16, %zmm0, %zmm13
	vpsrld	$16, %zmm9, %zmm15
	vpsrld	$16, %zmm5, %zmm12
	vpsrld	$16, %zmm8, %zmm8
	vpaddd	%zmm14, %zmm1, %zmm1
	vpaddd	%zmm13, %zmm2, %zmm2
	vpaddd	%zmm15, %zmm3, %zmm15
	vpaddd	%zmm12, %zmm4, %zmm3
	vpandd	%zmm6, %zmm0, %zmm13
	vpaddd	%zmm1, %zmm13, %zmm4
	vpaddd	%zmm4, %zmm8, %zmm14
	vpsrld	$16, %zmm14, %zmm0
	vpandd	%zmm6, %zmm9, %zmm9
	vpaddd	%zmm2, %zmm9, %zmm1
	vpaddd	%zmm1, %zmm0, %zmm8

	/*
	 * Now round at the 2^-8 bit position for reduction mod pi/2^7
	 * instead of the original 2pi (but still with the same 2pi scaling).
	 * Use a shifter of 2^15 + 2^14.
	 * The N we get is our final version; it has an offset of
	 * 2^8 because of the implicit integer bit, and anyway for negative
	 * starting value it's a 2s complement thing. But we need to mask
	 * off the exponent part anyway so it's fine.
	 */
	vmovups	.FLT_18(%rip), %zmm1
	vpandd	%zmm6, %zmm7, %zmm7
	vpaddd	%zmm3, %zmm7, %zmm13
	vpsrld	$16, %zmm8, %zmm3
	vpandd	%zmm6, %zmm5, %zmm5
	vpaddd	%zmm15, %zmm5, %zmm2
	vpaddd	%zmm2, %zmm3, %zmm15
	vpsrld	$16, %zmm15, %zmm12
	vpaddd	%zmm13, %zmm12, %zmm5

	/* Assemble reduced argument from the pieces */
	vpandd	%zmm6, %zmm14, %zmm9
	vpandd	%zmm6, %zmm15, %zmm7
	vpslld	$16, %zmm5, %zmm6
	vpslld	$16, %zmm8, %zmm5
	vpaddd	%zmm7, %zmm6, %zmm4
	vpaddd	%zmm9, %zmm5, %zmm9
	vpsrld	$9, %zmm4, %zmm6

	/*
	 * We want to incorporate the original sign now too.
	 * Do it here for convenience in getting the right N value,
	 * though we could wait right to the end if we were prepared
	 * to modify the sign of N later too.
	 * So get the appropriate sign mask now (or sooner).
	 */
	vpandd	.FLT_16(%rip), %zmm11, %zmm0
	vpandd	.FLT_21(%rip), %zmm9, %zmm13
	vpslld	$5, %zmm13, %zmm14

	/*
	 * Create floating-point high part, implicitly adding integer bit 1
	 * Incorporate overall sign at this stage too.
	 */
	vpxord	.FLT_17(%rip), %zmm0, %zmm8
	vpord	%zmm8, %zmm6, %zmm2
	vaddps	{rn-sae}, %zmm2, %zmm1, %zmm12
	vsubps	{rn-sae}, %zmm1, %zmm12, %zmm3
	vsubps	{rn-sae}, %zmm3, %zmm2, %zmm7

	/*
	 * Create floating-point low and medium parts, respectively
	 * lo_17, ... lo_0, 0, ..., 0
	 * hi_8, ... hi_0, lo_31, ..., lo_18
	 * then subtract off the implicitly added integer bits,
	 * 2^-46 and 2^-23, respectively.
	 * Put the original sign into all of them at this stage.
	 */
	vpxord	.FLT_20(%rip), %zmm0, %zmm6
	vpord	%zmm6, %zmm14, %zmm15
	vpandd	.FLT_23(%rip), %zmm4, %zmm4
	vsubps	{rn-sae}, %zmm6, %zmm15, %zmm8
	vandps	.FLT_26(%rip), %zmm11, %zmm15
	vpsrld	$18, %zmm9, %zmm6

	/*
	 * If the magnitude of the input is <= 2^-20, then
	 * just pass through the input, since no reduction will be needed and
	 * the main path will only work accurately if the reduced argument is
	 * about >= 2^-40 (which it is for all large pi multiples)
	 */
	vmovups	.FLT_27(%rip), %zmm14
	vcmpps	$26, {sae}, %zmm14, %zmm15, %k4
	vcmpps	$22, {sae}, %zmm14, %zmm15, %k5
	vpxord	.FLT_22(%rip), %zmm0, %zmm1
	vpslld	$14, %zmm4, %zmm0
	vpord	%zmm6, %zmm0, %zmm0
	vpord	%zmm1, %zmm0, %zmm4
	vsubps	{rn-sae}, %zmm1, %zmm4, %zmm2
	vpternlogd $255, %zmm6, %zmm6, %zmm6

	/* Now add them up into 2 reasonably aligned pieces */
	vaddps	{rn-sae}, %zmm2, %zmm7, %zmm13
	vsubps	{rn-sae}, %zmm13, %zmm7, %zmm7
	vaddps	{rn-sae}, %zmm7, %zmm2, %zmm3

	/*
	 * The output is _VRES_R (high) + _VRES_E (low), and the integer part is _VRES_IND
	 * Set sRp2 = _VRES_R^2 and then resume the original code.
	 */
	vmovups	.FLT_28(%rip), %zmm2
	vaddps	{rn-sae}, %zmm8, %zmm3, %zmm1
	vmovups	.FLT_25(%rip), %zmm8

	/* Grab our final N value as an integer, appropriately masked mod 2^8 */
	vpandd	.FLT_19(%rip), %zmm12, %zmm5

	/*
	 * Now multiply those numbers all by 2 pi, reasonably accurately.
	 * (RHi + RLo) * (pi_lead + pi_trail) ~=
	 * RHi * pi_lead + (RHi * pi_trail + RLo * pi_lead)
	 */
	vmovups	.FLT_24(%rip), %zmm12
	vmulps	{rn-sae}, %zmm12, %zmm13, %zmm0
	vmovaps	%zmm12, %zmm9
	vfmsub213ps {rn-sae}, %zmm0, %zmm13, %zmm9
	vfmadd213ps {rn-sae}, %zmm9, %zmm8, %zmm13
	vmovaps	%zmm6, %zmm8
	vfmadd213ps {rn-sae}, %zmm13, %zmm12, %zmm1
	vpandnd	%zmm15, %zmm15, %zmm8{%k4}
	vpandnd	%zmm15, %zmm15, %zmm6{%k5}
	vandps	%zmm11, %zmm6, %zmm14
	vandps	%zmm0, %zmm8, %zmm15
	vandps	%zmm1, %zmm8, %zmm12
	vorps	%zmm15, %zmm14, %zmm6
	vpsrld	$31, %zmm6, %zmm3
	vpsubd	%zmm3, %zmm2, %zmm4
	vpaddd	%zmm4, %zmm5, %zmm7
	vpsrld	$2, %zmm7, %zmm13
	vpslld	$2, %zmm13, %zmm9

	/*
	 *
	 * End of large arguments path
	 *
	 * Merge results from main and large paths:
	 */
	vblendmps %zmm13, %zmm10, %zmm10{%k6}
	vpsubd	%zmm9, %zmm5, %zmm5
	vmovups	.FLT_29(%rip), %zmm9
	vcvtdq2ps {rn-sae}, %zmm5, %zmm0
	vmovups	.FLT_30(%rip), %zmm5
	vfmadd231ps {rn-sae}, %zmm0, %zmm5, %zmm12
	vmovups	(%rsp), %zmm5
	vaddps	{rn-sae}, %zmm6, %zmm12, %zmm6
	vfmadd213ps {rn-sae}, %zmm6, %zmm9, %zmm0
	vblendmps %zmm0, %zmm5, %zmm5{%k6}

	/* Return to main vector processing path */
	jmp	L(AUX_BRANCH_RETURN)
	# LOE rbx r12 r13 r14 r15 edx zmm5 zmm10 zmm11
END(_ZGVeN16v_tanf_skx)

	.section .rodata, "a"
	.align	64

.FLT_12:
	.long	0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000
	.type	.FLT_12, @object
	.size	.FLT_12, 64
	.align	64

.FLT_13:
	.long	0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
	.type	.FLT_13, @object
	.size	.FLT_13, 64
	.align	64

.FLT_14:
	.long	0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000
	.type	.FLT_14, @object
	.size	.FLT_14, 64
	.align	64

.FLT_15:
	.long	0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff
	.type	.FLT_15, @object
	.size	.FLT_15, 64
	.align	64

.FLT_16:
	.long	0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
	.type	.FLT_16, @object
	.size	.FLT_16, 64
	.align	64

.FLT_17:
	.long	0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
	.type	.FLT_17, @object
	.size	.FLT_17, 64
	.align	64

.FLT_18:
	.long	0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000
	.type	.FLT_18, @object
	.size	.FLT_18, 64
	.align	64

.FLT_19:
	.long	0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff
	.type	.FLT_19, @object
	.size	.FLT_19, 64
	.align	64

.FLT_20:
	.long	0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000
	.type	.FLT_20, @object
	.size	.FLT_20, 64
	.align	64

.FLT_21:
	.long	0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff
	.type	.FLT_21, @object
	.size	.FLT_21, 64
	.align	64

.FLT_22:
	.long	0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000
	.type	.FLT_22, @object
	.size	.FLT_22, 64
	.align	64

.FLT_23:
	.long	0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff
	.type	.FLT_23, @object
	.size	.FLT_23, 64
	.align	64

.FLT_24:
	.long	0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb
	.type	.FLT_24, @object
	.size	.FLT_24, 64
	.align	64

.FLT_25:
	.long	0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e
	.type	.FLT_25, @object
	.size	.FLT_25, 64
	.align	64

.FLT_26:
	.long	0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
	.type	.FLT_26, @object
	.size	.FLT_26, 64
	.align	64

.FLT_27:
	.long	0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000
	.type	.FLT_27, @object
	.size	.FLT_27, 64
	.align	64

.FLT_28:
	.long	0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002
	.type	.FLT_28, @object
	.size	.FLT_28, 64
	.align	64

.FLT_29:
	.long	0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb
	.type	.FLT_29, @object
	.size	.FLT_29, 64
	.align	64

.FLT_30:
	.long	0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e
	.type	.FLT_30, @object
	.size	.FLT_30, 64
	.align	64

#ifdef __svml_stan_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
	__declspec(align(64)) VUINT32 _sInvPI_uisa[16][1];
	__declspec(align(64)) VUINT32 _sPI1_uisa[16][1];
	__declspec(align(64)) VUINT32 _sPI2_uisa[16][1];
	__declspec(align(64)) VUINT32 _sPI3_uisa[16][1];
	__declspec(align(64)) VUINT32 Th_tbl_uisa[32][1];
	__declspec(align(64)) VUINT32 _sPC3_uisa[16][1];
	__declspec(align(64)) VUINT32 _sPC5_uisa[16][1];
	__declspec(align(64)) VUINT32 _sRangeReductionVal_uisa[16][1];
	__declspec(align(64)) VUINT32 _sAbsMask[16][1];
	__declspec(align(64)) VUINT32 _sRangeVal[16][1];
	__declspec(align(64)) VUINT32 _sRShifter[16][1];
	__declspec(align(64)) VUINT32 _sOne[16][1];
	__declspec(align(64)) VUINT32 _sRangeReductionVal[16][1];
	__declspec(align(64)) VUINT32 _sPI1[16][1];
	__declspec(align(64)) VUINT32 _sPI2[16][1];
	__declspec(align(64)) VUINT32 _sPI3[16][1];
} __svml_stan_data_internal;
#endif
__svml_stan_data_internal:
	/* UISA */
	.long	0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983 /* _sInvPI_uisa */
	.align	64
	.long	0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda /* _sPI1_uisa */
	.align	64
	.long	0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168 /* _sPI2_uisa */
	.align	64
	.long	0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5 /* _sPI3_uisa */
	/* Th_tbl_uisa for i from 0 to 31 do printsingle(tan(i*Pi/32)); */
	.align	64
	.long	0x80000000, 0x3dc9b5dc, 0x3e4bafaf, 0x3e9b5042
	.long	0x3ed413cd, 0x3f08d5b9, 0x3f2b0dc1, 0x3f521801
	.long	0x3f800000, 0x3f9bf7ec, 0x3fbf90c7, 0x3fef789e
	.long	0x401a827a, 0x4052facf, 0x40a0dff7, 0x41227363
	.long	0xff7fffff, 0xc1227363, 0xc0a0dff7, 0xc052facf
	.long	0xc01a827a, 0xbfef789e, 0xbfbf90c7, 0xbf9bf7ec
	.long	0xbf800000, 0xbf521801, 0xbf2b0dc1, 0xbf08d5b9
	.long	0xbed413cd, 0xbe9b5042, 0xbe4bafaf, 0xbdc9b5dc
	.align	64
	.long	0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6 /* _sPC3_uisa */
	.align	64
	.long	0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888 /* _sPC5_uisa */
	.align	64
	.long	0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000 /* _sRangeReductionVal_uisa */
	.align	64
	.long	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF /* _sAbsMask */
	.align	64
	.long	0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 /* _sRangeVal */
	.align	64
	.long	0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000 /* _sRShifter */
	.align	64
	.long	0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 /* _sOne */
	.align	64
	.long	0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000 /* _sRangeVal */
	.align	64
	.long	0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000 /* _sPI1 */
	.align	64
	.long	0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000 /* _sPI2 */
	.align	64
	.long	0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000 /* _sPI3 */
	.align	64
	.type	__svml_stan_data_internal, @object
	.size	__svml_stan_data_internal, .-__svml_stan_data_internal
	.align	64

#ifdef __svml_stan_reduction_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
	__declspec(align(64)) VUINT32 _sPtable[256][3][1];
} __svml_stan_reduction_data_internal;
#endif
__svml_stan_reduction_data_internal:
	/*     P_hi                  P_med               P_lo                */
	.long	0x00000000, 0x00000000, 0x00000000 /* 0 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 1 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 2 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 3 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 4 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 5 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 6 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 7 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 8 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 9 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 10 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 11 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 12 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 13 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 14 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 15 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 16 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 17 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 18 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 19 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 20 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 21 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 22 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 23 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 24 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 25 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 26 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 27 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 28 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 29 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 30 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 31 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 32 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 33 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 34 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 35 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 36 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 37 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 38 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 39 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 40 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 41 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 42 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 43 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 44 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 45 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 46 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 47 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 48 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 49 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 50 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 51 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 52 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 53 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 54 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 55 */
	.long	0x00000000, 0x00000000, 0x00000000 /* 56 */
	.long	0x00000000, 0x00000000, 0x00000001 /* 57 */
	.long	0x00000000, 0x00000000, 0x00000002 /* 58 */
	.long	0x00000000, 0x00000000, 0x00000005 /* 59 */
	.long	0x00000000, 0x00000000, 0x0000000A /* 60 */
	.long	0x00000000, 0x00000000, 0x00000014 /* 61 */
	.long	0x00000000, 0x00000000, 0x00000028 /* 62 */
	.long	0x00000000, 0x00000000, 0x00000051 /* 63 */
	.long	0x00000000, 0x00000000, 0x000000A2 /* 64 */
	.long	0x00000000, 0x00000000, 0x00000145 /* 65 */
	.long	0x00000000, 0x00000000, 0x0000028B /* 66 */
	.long	0x00000000, 0x00000000, 0x00000517 /* 67 */
	.long	0x00000000, 0x00000000, 0x00000A2F /* 68 */
	.long	0x00000000, 0x00000000, 0x0000145F /* 69 */
	.long	0x00000000, 0x00000000, 0x000028BE /* 70 */
	.long	0x00000000, 0x00000000, 0x0000517C /* 71 */
	.long	0x00000000, 0x00000000, 0x0000A2F9 /* 72 */
	.long	0x00000000, 0x00000000, 0x000145F3 /* 73 */
	.long	0x00000000, 0x00000000, 0x00028BE6 /* 74 */
	.long	0x00000000, 0x00000000, 0x000517CC /* 75 */
	.long	0x00000000, 0x00000000, 0x000A2F98 /* 76 */
	.long	0x00000000, 0x00000000, 0x00145F30 /* 77 */
	.long	0x00000000, 0x00000000, 0x0028BE60 /* 78 */
	.long	0x00000000, 0x00000000, 0x00517CC1 /* 79 */
	.long	0x00000000, 0x00000000, 0x00A2F983 /* 80 */
	.long	0x00000000, 0x00000000, 0x0145F306 /* 81 */
	.long	0x00000000, 0x00000000, 0x028BE60D /* 82 */
	.long	0x00000000, 0x00000000, 0x0517CC1B /* 83 */
	.long	0x00000000, 0x00000000, 0x0A2F9836 /* 84 */
	.long	0x00000000, 0x00000000, 0x145F306D /* 85 */
	.long	0x00000000, 0x00000000, 0x28BE60DB /* 86 */
	.long	0x00000000, 0x00000000, 0x517CC1B7 /* 87 */
	.long	0x00000000, 0x00000000, 0xA2F9836E /* 88 */
	.long	0x00000000, 0x00000001, 0x45F306DC /* 89 */
	.long	0x00000000, 0x00000002, 0x8BE60DB9 /* 90 */
	.long	0x00000000, 0x00000005, 0x17CC1B72 /* 91 */
	.long	0x00000000, 0x0000000A, 0x2F9836E4 /* 92 */
	.long	0x00000000, 0x00000014, 0x5F306DC9 /* 93 */
	.long	0x00000000, 0x00000028, 0xBE60DB93 /* 94 */
	.long	0x00000000, 0x00000051, 0x7CC1B727 /* 95 */
	.long	0x00000000, 0x000000A2, 0xF9836E4E /* 96 */
	.long	0x00000000, 0x00000145, 0xF306DC9C /* 97 */
	.long	0x00000000, 0x0000028B, 0xE60DB939 /* 98 */
	.long	0x00000000, 0x00000517, 0xCC1B7272 /* 99 */
	.long	0x00000000, 0x00000A2F, 0x9836E4E4 /* 100 */
	.long	0x00000000, 0x0000145F, 0x306DC9C8 /* 101 */
	.long	0x00000000, 0x000028BE, 0x60DB9391 /* 102 */
	.long	0x00000000, 0x0000517C, 0xC1B72722 /* 103 */
	.long	0x00000000, 0x0000A2F9, 0x836E4E44 /* 104 */
	.long	0x00000000, 0x000145F3, 0x06DC9C88 /* 105 */
	.long	0x00000000, 0x00028BE6, 0x0DB93910 /* 106 */
	.long	0x00000000, 0x000517CC, 0x1B727220 /* 107 */
	.long	0x00000000, 0x000A2F98, 0x36E4E441 /* 108 */
	.long	0x00000000, 0x00145F30, 0x6DC9C882 /* 109 */
	.long	0x00000000, 0x0028BE60, 0xDB939105 /* 110 */
	.long	0x00000000, 0x00517CC1, 0xB727220A /* 111 */
	.long	0x00000000, 0x00A2F983, 0x6E4E4415 /* 112 */
	.long	0x00000000, 0x0145F306, 0xDC9C882A /* 113 */
	.long	0x00000000, 0x028BE60D, 0xB9391054 /* 114 */
	.long	0x00000000, 0x0517CC1B, 0x727220A9 /* 115 */
	.long	0x00000000, 0x0A2F9836, 0xE4E44152 /* 116 */
	.long	0x00000000, 0x145F306D, 0xC9C882A5 /* 117 */
	.long	0x00000000, 0x28BE60DB, 0x9391054A /* 118 */
	.long	0x00000000, 0x517CC1B7, 0x27220A94 /* 119 */
	.long	0x00000000, 0xA2F9836E, 0x4E441529 /* 120 */
	.long	0x00000001, 0x45F306DC, 0x9C882A53 /* 121 */
	.long	0x00000002, 0x8BE60DB9, 0x391054A7 /* 122 */
	.long	0x00000005, 0x17CC1B72, 0x7220A94F /* 123 */
	.long	0x0000000A, 0x2F9836E4, 0xE441529F /* 124 */
	.long	0x00000014, 0x5F306DC9, 0xC882A53F /* 125 */
	.long	0x00000028, 0xBE60DB93, 0x91054A7F /* 126 */
	.long	0x00000051, 0x7CC1B727, 0x220A94FE /* 127 */
	.long	0x000000A2, 0xF9836E4E, 0x441529FC /* 128 */
	.long	0x00000145, 0xF306DC9C, 0x882A53F8 /* 129 */
	.long	0x0000028B, 0xE60DB939, 0x1054A7F0 /* 130 */
	.long	0x00000517, 0xCC1B7272, 0x20A94FE1 /* 131 */
	.long	0x00000A2F, 0x9836E4E4, 0x41529FC2 /* 132 */
	.long	0x0000145F, 0x306DC9C8, 0x82A53F84 /* 133 */
	.long	0x000028BE, 0x60DB9391, 0x054A7F09 /* 134 */
	.long	0x0000517C, 0xC1B72722, 0x0A94FE13 /* 135 */
	.long	0x0000A2F9, 0x836E4E44, 0x1529FC27 /* 136 */
	.long	0x000145F3, 0x06DC9C88, 0x2A53F84E /* 137 */
	.long	0x00028BE6, 0x0DB93910, 0x54A7F09D /* 138 */
	.long	0x000517CC, 0x1B727220, 0xA94FE13A /* 139 */
	.long	0x000A2F98, 0x36E4E441, 0x529FC275 /* 140 */
	.long	0x00145F30, 0x6DC9C882, 0xA53F84EA /* 141 */
	.long	0x0028BE60, 0xDB939105, 0x4A7F09D5 /* 142 */
	.long	0x00517CC1, 0xB727220A, 0x94FE13AB /* 143 */
	.long	0x00A2F983, 0x6E4E4415, 0x29FC2757 /* 144 */
	.long	0x0145F306, 0xDC9C882A, 0x53F84EAF /* 145 */
	.long	0x028BE60D, 0xB9391054, 0xA7F09D5F /* 146 */
	.long	0x0517CC1B, 0x727220A9, 0x4FE13ABE /* 147 */
	.long	0x0A2F9836, 0xE4E44152, 0x9FC2757D /* 148 */
	.long	0x145F306D, 0xC9C882A5, 0x3F84EAFA /* 149 */
	.long	0x28BE60DB, 0x9391054A, 0x7F09D5F4 /* 150 */
	.long	0x517CC1B7, 0x27220A94, 0xFE13ABE8 /* 151 */
	.long	0xA2F9836E, 0x4E441529, 0xFC2757D1 /* 152 */
	.long	0x45F306DC, 0x9C882A53, 0xF84EAFA3 /* 153 */
	.long	0x8BE60DB9, 0x391054A7, 0xF09D5F47 /* 154 */
	.long	0x17CC1B72, 0x7220A94F, 0xE13ABE8F /* 155 */
	.long	0x2F9836E4, 0xE441529F, 0xC2757D1F /* 156 */
	.long	0x5F306DC9, 0xC882A53F, 0x84EAFA3E /* 157 */
	.long	0xBE60DB93, 0x91054A7F, 0x09D5F47D /* 158 */
	.long	0x7CC1B727, 0x220A94FE, 0x13ABE8FA /* 159 */
	.long	0xF9836E4E, 0x441529FC, 0x2757D1F5 /* 160 */
	.long	0xF306DC9C, 0x882A53F8, 0x4EAFA3EA /* 161 */
	.long	0xE60DB939, 0x1054A7F0, 0x9D5F47D4 /* 162 */
	.long	0xCC1B7272, 0x20A94FE1, 0x3ABE8FA9 /* 163 */
	.long	0x9836E4E4, 0x41529FC2, 0x757D1F53 /* 164 */
	.long	0x306DC9C8, 0x82A53F84, 0xEAFA3EA6 /* 165 */
	.long	0x60DB9391, 0x054A7F09, 0xD5F47D4D /* 166 */
	.long	0xC1B72722, 0x0A94FE13, 0xABE8FA9A /* 167 */
	.long	0x836E4E44, 0x1529FC27, 0x57D1F534 /* 168 */
	.long	0x06DC9C88, 0x2A53F84E, 0xAFA3EA69 /* 169 */
	.long	0x0DB93910, 0x54A7F09D, 0x5F47D4D3 /* 170 */
	.long	0x1B727220, 0xA94FE13A, 0xBE8FA9A6 /* 171 */
	.long	0x36E4E441, 0x529FC275, 0x7D1F534D /* 172 */
	.long	0x6DC9C882, 0xA53F84EA, 0xFA3EA69B /* 173 */
	.long	0xDB939105, 0x4A7F09D5, 0xF47D4D37 /* 174 */
	.long	0xB727220A, 0x94FE13AB, 0xE8FA9A6E /* 175 */
	.long	0x6E4E4415, 0x29FC2757, 0xD1F534DD /* 176 */
	.long	0xDC9C882A, 0x53F84EAF, 0xA3EA69BB /* 177 */
	.long	0xB9391054, 0xA7F09D5F, 0x47D4D377 /* 178 */
	.long	0x727220A9, 0x4FE13ABE, 0x8FA9A6EE /* 179 */
	.long	0xE4E44152, 0x9FC2757D, 0x1F534DDC /* 180 */
	.long	0xC9C882A5, 0x3F84EAFA, 0x3EA69BB8 /* 181 */
	.long	0x9391054A, 0x7F09D5F4, 0x7D4D3770 /* 182 */
	.long	0x27220A94, 0xFE13ABE8, 0xFA9A6EE0 /* 183 */
	.long	0x4E441529, 0xFC2757D1, 0xF534DDC0 /* 184 */
	.long	0x9C882A53, 0xF84EAFA3, 0xEA69BB81 /* 185 */
	.long	0x391054A7, 0xF09D5F47, 0xD4D37703 /* 186 */
	.long	0x7220A94F, 0xE13ABE8F, 0xA9A6EE06 /* 187 */
	.long	0xE441529F, 0xC2757D1F, 0x534DDC0D /* 188 */
	.long	0xC882A53F, 0x84EAFA3E, 0xA69BB81B /* 189 */
	.long	0x91054A7F, 0x09D5F47D, 0x4D377036 /* 190 */
	.long	0x220A94FE, 0x13ABE8FA, 0x9A6EE06D /* 191 */
	.long	0x441529FC, 0x2757D1F5, 0x34DDC0DB /* 192 */
	.long	0x882A53F8, 0x4EAFA3EA, 0x69BB81B6 /* 193 */
	.long	0x1054A7F0, 0x9D5F47D4, 0xD377036D /* 194 */
	.long	0x20A94FE1, 0x3ABE8FA9, 0xA6EE06DB /* 195 */
	.long	0x41529FC2, 0x757D1F53, 0x4DDC0DB6 /* 196 */
	.long	0x82A53F84, 0xEAFA3EA6, 0x9BB81B6C /* 197 */
	.long	0x054A7F09, 0xD5F47D4D, 0x377036D8 /* 198 */
	.long	0x0A94FE13, 0xABE8FA9A, 0x6EE06DB1 /* 199 */
	.long	0x1529FC27, 0x57D1F534, 0xDDC0DB62 /* 200 */
	.long	0x2A53F84E, 0xAFA3EA69, 0xBB81B6C5 /* 201 */
	.long	0x54A7F09D, 0x5F47D4D3, 0x77036D8A /* 202 */
	.long	0xA94FE13A, 0xBE8FA9A6, 0xEE06DB14 /* 203 */
	.long	0x529FC275, 0x7D1F534D, 0xDC0DB629 /* 204 */
	.long	0xA53F84EA, 0xFA3EA69B, 0xB81B6C52 /* 205 */
	.long	0x4A7F09D5, 0xF47D4D37, 0x7036D8A5 /* 206 */
	.long	0x94FE13AB, 0xE8FA9A6E, 0xE06DB14A /* 207 */
	.long	0x29FC2757, 0xD1F534DD, 0xC0DB6295 /* 208 */
	.long	0x53F84EAF, 0xA3EA69BB, 0x81B6C52B /* 209 */
	.long	0xA7F09D5F, 0x47D4D377, 0x036D8A56 /* 210 */
	.long	0x4FE13ABE, 0x8FA9A6EE, 0x06DB14AC /* 211 */
	.long	0x9FC2757D, 0x1F534DDC, 0x0DB62959 /* 212 */
	.long	0x3F84EAFA, 0x3EA69BB8, 0x1B6C52B3 /* 213 */
	.long	0x7F09D5F4, 0x7D4D3770, 0x36D8A566 /* 214 */
	.long	0xFE13ABE8, 0xFA9A6EE0, 0x6DB14ACC /* 215 */
	.long	0xFC2757D1, 0xF534DDC0, 0xDB629599 /* 216 */
	.long	0xF84EAFA3, 0xEA69BB81, 0xB6C52B32 /* 217 */
	.long	0xF09D5F47, 0xD4D37703, 0x6D8A5664 /* 218 */
	.long	0xE13ABE8F, 0xA9A6EE06, 0xDB14ACC9 /* 219 */
	.long	0xC2757D1F, 0x534DDC0D, 0xB6295993 /* 220 */
	.long	0x84EAFA3E, 0xA69BB81B, 0x6C52B327 /* 221 */
	.long	0x09D5F47D, 0x4D377036, 0xD8A5664F /* 222 */
	.long	0x13ABE8FA, 0x9A6EE06D, 0xB14ACC9E /* 223 */
	.long	0x2757D1F5, 0x34DDC0DB, 0x6295993C /* 224 */
	.long	0x4EAFA3EA, 0x69BB81B6, 0xC52B3278 /* 225 */
	.long	0x9D5F47D4, 0xD377036D, 0x8A5664F1 /* 226 */
	.long	0x3ABE8FA9, 0xA6EE06DB, 0x14ACC9E2 /* 227 */
	.long	0x757D1F53, 0x4DDC0DB6, 0x295993C4 /* 228 */
	.long	0xEAFA3EA6, 0x9BB81B6C, 0x52B32788 /* 229 */
	.long	0xD5F47D4D, 0x377036D8, 0xA5664F10 /* 230 */
	.long	0xABE8FA9A, 0x6EE06DB1, 0x4ACC9E21 /* 231 */
	.long	0x57D1F534, 0xDDC0DB62, 0x95993C43 /* 232 */
	.long	0xAFA3EA69, 0xBB81B6C5, 0x2B327887 /* 233 */
	.long	0x5F47D4D3, 0x77036D8A, 0x5664F10E /* 234 */
	.long	0xBE8FA9A6, 0xEE06DB14, 0xACC9E21C /* 235 */
	.long	0x7D1F534D, 0xDC0DB629, 0x5993C439 /* 236 */
	.long	0xFA3EA69B, 0xB81B6C52, 0xB3278872 /* 237 */
	.long	0xF47D4D37, 0x7036D8A5, 0x664F10E4 /* 238 */
	.long	0xE8FA9A6E, 0xE06DB14A, 0xCC9E21C8 /* 239 */
	.long	0xD1F534DD, 0xC0DB6295, 0x993C4390 /* 240 */
	.long	0xA3EA69BB, 0x81B6C52B, 0x32788720 /* 241 */
	.long	0x47D4D377, 0x036D8A56, 0x64F10E41 /* 242 */
	.long	0x8FA9A6EE, 0x06DB14AC, 0xC9E21C82 /* 243 */
	.long	0x1F534DDC, 0x0DB62959, 0x93C43904 /* 244 */
	.long	0x3EA69BB8, 0x1B6C52B3, 0x27887208 /* 245 */
	.long	0x7D4D3770, 0x36D8A566, 0x4F10E410 /* 246 */
	.long	0xFA9A6EE0, 0x6DB14ACC, 0x9E21C820 /* 247 */
	.long	0xF534DDC0, 0xDB629599, 0x3C439041 /* 248 */
	.long	0xEA69BB81, 0xB6C52B32, 0x78872083 /* 249 */
	.long	0xD4D37703, 0x6D8A5664, 0xF10E4107 /* 250 */
	.long	0xA9A6EE06, 0xDB14ACC9, 0xE21C820F /* 251 */
	.long	0x534DDC0D, 0xB6295993, 0xC439041F /* 252 */
	.long	0xA69BB81B, 0x6C52B327, 0x8872083F /* 253 */
	.long	0x4D377036, 0xD8A5664F, 0x10E4107F /* 254 */
	.long	0x9A6EE06D, 0xB14ACC9E, 0x21C820FF /* 255 */
	.align	64
	.type	__svml_stan_reduction_data_internal, @object
	.size	__svml_stan_reduction_data_internal, .-__svml_stan_reduction_data_internal