1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
/* llroundl function.
IBM extended format long double version.
Copyright (C) 2004, 2006 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <math_ldbl_opt.h>
.section ".toc","aw"
.LC0: /* 0.0 */
.tc FD_00000000_0[TC],0x0000000000000000
.LC1: /* 0.5 */
.tc FD_3fe00000_0[TC],0x3fe0000000000000
.LC2: /* 2**52 */
.tc FD_43300000_0[TC],0x4330000000000000
.LC3: /* 2**63 */
.tc FD_43E00000_0[TC],0x43e0000000000000
.section ".text"
/* long long [r3] llround (long double x [fp1,fp2])
IEEE 1003.1 llroundl function. IEEE specifies "round to the nearest
integer value, rounding halfway cases away from zero, regardless of
the current rounding mode." However PowerPC Architecture defines
"round to Nearest" as "Choose the best approximation. In case of a
tie, choose the one that is even (least significant bit o).".
So we can't use the PowerPC "round to Nearest" mode. Instead we set
"round toward Zero" mode and round by adding +-0.5 before rounding
toward zero. The "Floating Convert To Integer Doubleword with round
toward zero" instruction handles the conversion including the
overflow cases and signalling "Invalid Operation".
PowerPC64 long double uses the IBM extended format which is
represented two 64-floating point double values. The values are
non-overlapping giving an effective precision of 106 bits. The first
double contains the high order bits of mantisa and is always rounded
to represent a normal rounding of long double to double. Since the
long double value is sum of the high and low values, the low double
normally has the opposite sign to compensate for the this rounding.
For long double there is 4 cases:
1) |x| < 2**52, all the integer bits are in the high double.
Round and convert the high double to long long.
2) 2**52 <= |x|< 2**63, Still fits but need bits from both doubles.
Round the low double, convert both, then sum the long long values.
3) |x| == 2**63, Looks like an overflow but may not be due to rounding
of the high double.
See the description following lable L2.
4) |x| > 2**63, This will overflow the 64-bit signed integer.
Treat like case #1. The fctidz instruction will generate the
appropriate and signal "invalid operation".
*/
ENTRY (__llroundl)
mffs fp7 /* Save current FPU rounding mode. */
fabs fp0,fp1
lfd fp13,.LC2@toc(2) /* 2**52 */
lfd fp12,.LC3@toc(2) /* 2**63 */
lfd fp11,.LC0@toc(2) /* 0.0 */
lfd fp10,.LC1@toc(2) /* 0.5 */
fabs fp9,fp2
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
fcmpu cr6,fp1,fp11 /* if (x > 0.0) */
bnl- cr7,.L2
mtfsfi 7,1 /* Set rounding mode toward 0. */
ble- cr6,.L1
fadd fp9,fp1,fp10 /* x+= 0.5; */
b .L0
.L1:
fsub fp9,fp1,fp10 /* x-= 0.5; */
.L0:
fctid fp0,fp9
stfd fp0,-16(r1)
mtfsf 0x01,fp7 /* restore previous rounding mode. */
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
ld r3,-16(r1)
blr
/* The high double is > TWO52 so we need to round the low double and
perhaps the high double. In this case we have to round the low
double and handle any adjustment to the high double that may be
caused by rounding (up). This is complicated by the fact that the
high double may already be rounded and the low double may have the
opposite sign to compensate.This gets a bit tricky so we use the
following algorithm:
tau = trunc(x_high/TWO52);
x0 = x_high - tau;
x1 = x_low + tau;
r1 = round(x1);
y_high = x0 + r1;
y_low = x0 - y_high + r1;
return y; */
.L2:
fcmpu cr7,fp0,fp12 /* if (|x_high| > TWO63) */
fcmpu cr0,fp9,fp11 /* || (|x_low| == 0.0) */
fmr fp9,fp1
fcmpu cr5,fp2,fp11 /* if (x_low > 0.0) */
bgt- cr7,.L0 /* return llround(x); */
mtfsfi 7,1 /* Set rounding mode toward 0. */
fdiv fp8,fp1,fp13 /* x_high/TWO52 */
bng- cr6,.L6 /* if (x > 0.0) */
fctidz fp0,fp8
fcfid fp8,fp0 /* tau = trunc(x_high/TWO52); */
bng cr5,.L4 /* if (x_low > 0.0) */
fmr fp3,fp1
fmr fp4,fp2
b .L5
.L4: /* if (x_low < 0.0) */
fsub fp3,fp1,fp8 /* x0 = x_high - tau; */
fadd fp4,fp2,fp8 /* x1 = x_low + tau; */
.L5:
fadd fp5,fp4,fp10 /* r1 = x1 + 0.5; */
b .L9
.L6: /* if (x < 0.0) */
fctidz fp0,fp8
fcfid fp8,fp0 /* tau = trunc(x_high/TWO52); */
bnl cr5,.L7 /* if (x_low < 0.0) */
fmr fp3,fp1
fmr fp4,fp2
b .L8
.L7: /* if (x_low > 0.0) */
fsub fp3,fp1,fp8 /* x0 = x_high - tau; */
fadd fp4,fp2,fp8 /* x1 = x_low + tau; */
.L8:
fsub fp5,fp4,fp10 /* r1 = x1 - 0.5; */
.L9:
fctid. fp11,fp3
fctid fp12,fp5
stfd fp11,-16(r1)
stfd fp12,-8(r1)
mtfsf 0x01,fp7 /* restore previous rounding mode. */
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
ld r3,-16(r1)
bunlr cr1 /* if not overflow, return. */
ld r0,-8(r1)
addo. r3,r3,r0
bnslr cr0
fmr fp9,fp12
bng cr6,.L0
fneg fp9,fp12
b .L0
END (__llroundl)
strong_alias (__llroundl, __lroundl)
long_double_symbol (libm, __llroundl, llroundl)
long_double_symbol (libm, __lroundl, lroundl)
|