1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
|
/*
* mfwddct.c (derived from jfwddct.c, which carries the following info)
*
* Copyright (C) 1991, 1992, Thomas G. Lane. This file is part of the
* Independent JPEG Group's software. For conditions of distribution and use,
* see the accompanying README file.
*
* This file contains the basic DCT (Discrete Cosine Transform) transformation
* subroutine.
*
* This implementation is based on Appendix A.2 of the book "Discrete Cosine
* Transform---Algorithms, Advantages, Applications" by K.R. Rao and P. Yip
* (Academic Press, Inc, London, 1990). It uses scaled fixed-point arithmetic
* instead of floating point.
*/
#include "all.h"
#include "dct.h"
#include "mtypes.h"
#include "opts.h"
/*
* The poop on this scaling stuff is as follows:
*
* We have to do addition and subtraction of the integer inputs, which is no
* problem, and multiplication by fractional constants, which is a problem to
* do in integer arithmetic. We multiply all the constants by DCT_SCALE and
* convert them to integer constants (thus retaining LG2_DCT_SCALE bits of
* precision in the constants). After doing a multiplication we have to
* divide the product by DCT_SCALE, with proper rounding, to produce the
* correct output. The division can be implemented cheaply as a right shift
* of LG2_DCT_SCALE bits. The DCT equations also specify an additional
* division by 2 on the final outputs; this can be folded into the
* right-shift by shifting one more bit (see UNFIXH).
*
* If you are planning to recode this in assembler, you might want to set
* LG2_DCT_SCALE to 15. This loses a bit of precision, but then all the
* multiplications are between 16-bit quantities (given 8-bit JSAMPLEs!) so
* you could use a signed 16x16=>32 bit multiply instruction instead of full
* 32x32 multiply. Unfortunately there's no way to describe such a multiply
* portably in C, so we've gone for the extra bit of accuracy here.
*/
#define EIGHT_BIT_SAMPLES
#ifdef EIGHT_BIT_SAMPLES
#define LG2_DCT_SCALE 16
#else
#define LG2_DCT_SCALE 15 /* lose a little precision to avoid overflow */
#endif
#define ONE ((int32) 1)
#define DCT_SCALE (ONE << LG2_DCT_SCALE)
/* In some places we shift the inputs left by a couple more bits, */
/* so that they can be added to fractional results without too much */
/* loss of precision. */
#define LG2_OVERSCALE 2
#define OVERSCALE (ONE << LG2_OVERSCALE)
#define OVERSHIFT(x) ((x) <<= LG2_OVERSCALE)
/* Scale a fractional constant by DCT_SCALE */
#define FIX(x) ((int32) ((x) * DCT_SCALE + 0.5))
/* Scale a fractional constant by DCT_SCALE/OVERSCALE */
/* Such a constant can be multiplied with an overscaled input */
/* to produce something that's scaled by DCT_SCALE */
#define FIXO(x) ((int32) ((x) * DCT_SCALE / OVERSCALE + 0.5))
/* Descale and correctly round a value that's scaled by DCT_SCALE */
#define UNFIX(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1)), LG2_DCT_SCALE)
/* Same with an additional division by 2, ie, correctly rounded UNFIX(x/2) */
#define UNFIXH(x) RIGHT_SHIFT((x) + (ONE << LG2_DCT_SCALE), LG2_DCT_SCALE+1)
/* Take a value scaled by DCT_SCALE and round to integer scaled by OVERSCALE */
#define UNFIXO(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1-LG2_OVERSCALE)),\
LG2_DCT_SCALE-LG2_OVERSCALE)
/* Here are the constants we need */
/* SIN_i_j is sine of i*pi/j, scaled by DCT_SCALE */
/* COS_i_j is cosine of i*pi/j, scaled by DCT_SCALE */
#define SIN_1_4 FIX(0.707106781)
#define COS_1_4 SIN_1_4
#define SIN_1_8 FIX(0.382683432)
#define COS_1_8 FIX(0.923879533)
#define SIN_3_8 COS_1_8
#define COS_3_8 SIN_1_8
#define SIN_1_16 FIX(0.195090322)
#define COS_1_16 FIX(0.980785280)
#define SIN_7_16 COS_1_16
#define COS_7_16 SIN_1_16
#define SIN_3_16 FIX(0.555570233)
#define COS_3_16 FIX(0.831469612)
#define SIN_5_16 COS_3_16
#define COS_5_16 SIN_3_16
/* OSIN_i_j is sine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
/* OCOS_i_j is cosine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
#define OSIN_1_4 FIXO(0.707106781)
#define OCOS_1_4 OSIN_1_4
#define OSIN_1_8 FIXO(0.382683432)
#define OCOS_1_8 FIXO(0.923879533)
#define OSIN_3_8 OCOS_1_8
#define OCOS_3_8 OSIN_1_8
#define OSIN_1_16 FIXO(0.195090322)
#define OCOS_1_16 FIXO(0.980785280)
#define OSIN_7_16 OCOS_1_16
#define OCOS_7_16 OSIN_1_16
#define OSIN_3_16 FIXO(0.555570233)
#define OCOS_3_16 FIXO(0.831469612)
#define OSIN_5_16 OCOS_3_16
#define OCOS_5_16 OSIN_3_16
/* Prototypes */
void reference_fwd_dct _ANSI_ARGS_((Block block, Block dest));
void mp_fwd_dct_fast _ANSI_ARGS_((Block data2d, Block dest2d));
void init_fdct _ANSI_ARGS_((void));
/*
* --------------------------------------------------------------
*
* mp_fwd_dct_block2 --
*
* Select the appropriate mp_fwd_dct routine
*
* Results: None
*
* Side effects: None
*
* --------------------------------------------------------------
*/
extern boolean pureDCT;
void
mp_fwd_dct_block2(data, dest)
Block data, dest;
{
if (pureDCT) reference_fwd_dct(data, dest);
else mp_fwd_dct_fast(data, dest);
}
/*
* --------------------------------------------------------------
*
* mp_fwd_dct_fast --
*
* Perform the forward DCT on one block of samples.
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT on each
* column.
*
* Results: None
*
* Side effects: Overwrites the input data
*
* --------------------------------------------------------------
*/
void
mp_fwd_dct_fast(data2d, dest2d)
Block data2d, dest2d;
{
int16 *data = (int16 *) data2d; /* this algorithm wants
* a 1-d array */
int16 *dest = (int16 *) dest2d;
int pass, rowctr;
register int16 *inptr, *outptr;
int16 workspace[DCTSIZE_SQ];
SHIFT_TEMPS
#ifdef ndef
{
int y;
printf("fwd_dct (beforehand):\n");
for (y = 0; y < 8; y++)
printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
data2d[y][0], data2d[y][1],
data2d[y][2], data2d[y][3],
data2d[y][4], data2d[y][5],
data2d[y][6], data2d[y][7]);
}
#endif
/*
* Each iteration of the inner loop performs one 8-point 1-D DCT. It
* reads from a *row* of the input matrix and stores into a *column*
* of the output matrix. In the first pass, we read from the data[]
* array and store into the local workspace[]. In the second pass,
* we read from the workspace[] array and store into data[], thus
* performing the equivalent of a columnar DCT pass with no variable
* array indexing.
*/
inptr = data; /* initialize pointers for first pass */
outptr = workspace;
for (pass = 1; pass >= 0; pass--) {
for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
/*
* many tmps have nonoverlapping lifetime -- flashy
* register colorers should be able to do this lot
* very well
*/
int32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int32 tmp10, tmp11, tmp12, tmp13;
int32 tmp14, tmp15, tmp16, tmp17;
int32 tmp25, tmp26;
/* SHIFT_TEMPS */
/* temp0 through tmp7: -512 to +512 */
/* if I-block, then -256 to +256 */
tmp0 = inptr[7] + inptr[0];
tmp1 = inptr[6] + inptr[1];
tmp2 = inptr[5] + inptr[2];
tmp3 = inptr[4] + inptr[3];
tmp4 = inptr[3] - inptr[4];
tmp5 = inptr[2] - inptr[5];
tmp6 = inptr[1] - inptr[6];
tmp7 = inptr[0] - inptr[7];
/* tmp10 through tmp13: -1024 to +1024 */
/* if I-block, then -512 to +512 */
tmp10 = tmp3 + tmp0;
tmp11 = tmp2 + tmp1;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
outptr[0] = (int16) UNFIXH((tmp10 + tmp11) * SIN_1_4);
outptr[DCTSIZE * 4] = (int16) UNFIXH((tmp10 - tmp11) * COS_1_4);
outptr[DCTSIZE * 2] = (int16) UNFIXH(tmp13 * COS_1_8 + tmp12 * SIN_1_8);
outptr[DCTSIZE * 6] = (int16) UNFIXH(tmp13 * SIN_1_8 - tmp12 * COS_1_8);
tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
OVERSHIFT(tmp4);
OVERSHIFT(tmp7);
/*
* tmp4, tmp7, tmp15, tmp16 are overscaled by
* OVERSCALE
*/
tmp14 = tmp4 + tmp15;
tmp25 = tmp4 - tmp15;
tmp26 = tmp7 - tmp16;
tmp17 = tmp7 + tmp16;
outptr[DCTSIZE] = (int16) UNFIXH(tmp17 * OCOS_1_16 + tmp14 * OSIN_1_16);
outptr[DCTSIZE * 7] = (int16) UNFIXH(tmp17 * OCOS_7_16 - tmp14 * OSIN_7_16);
outptr[DCTSIZE * 5] = (int16) UNFIXH(tmp26 * OCOS_5_16 + tmp25 * OSIN_5_16);
outptr[DCTSIZE * 3] = (int16) UNFIXH(tmp26 * OCOS_3_16 - tmp25 * OSIN_3_16);
inptr += DCTSIZE; /* advance inptr to next row */
outptr++; /* advance outptr to next column */
}
/* end of pass; in case it was pass 1, set up for pass 2 */
inptr = workspace;
outptr = dest;
}
#ifdef ndef
{
int y;
printf("fwd_dct (afterward):\n");
for (y = 0; y < 8; y++)
printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
dest2d[y][0], dest2d[y][1],
dest2d[y][2], dest2d[y][3],
dest2d[y][4], dest2d[y][5],
dest2d[y][6], dest2d[y][7]);
}
#endif
}
/* Modifies from the MPEG2 verification coder */
/* fdctref.c, forward discrete cosine transform, double precision */
/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */
/*
* Disclaimer of Warranty
*
* These software programs are available to the user without any license fee or
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
* any and all warranties, whether express, implied, or statuary, including any
* implied warranties or merchantability or of fitness for a particular
* purpose. In no event shall the copyright-holder be liable for any
* incidental, punitive, or consequential damages of any kind whatsoever
* arising from the use of these programs.
*
* This disclaimer of warranty extends to the user of these programs and user's
* customers, employees, agents, transferees, successors, and assigns.
*
* The MPEG Software Simulation Group does not represent or warrant that the
* programs furnished hereunder are free of infringement of any third-party
* patents.
*
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
* are subject to royalty fees to patent holders. Many of these patents are
* general enough such that they are unavoidable regardless of implementation
* design.
*
*/
#ifndef PI
#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif
#endif
/* private data */
static double trans_coef[8][8]; /* transform coefficients */
void init_fdct()
{
int i, j;
double s;
for (i=0; i<8; i++)
{
s = (i==0) ? sqrt(0.125) : 0.5;
for (j=0; j<8; j++)
trans_coef[i][j] = s * cos((PI/8.0)*i*(j+0.5));
}
}
void reference_fwd_dct(block, dest)
Block block, dest;
{
int i, j, k;
double s;
double tmp[64];
if (DoLaplace) {
LaplaceNum++;
}
for (i=0; i<8; i++)
for (j=0; j<8; j++)
{
s = 0.0;
for (k=0; k<8; k++)
s += trans_coef[j][k] * block[i][k];
tmp[8*i+j] = s;
}
for (i=0; i<8; i++)
for (j=0; j<8; j++)
{
s = 0.0;
for (k=0; k<8; k++)
s += trans_coef[i][k] * tmp[8*k+j];
if (collect_quant) {
fprintf(collect_quant_fp, "%d %f\n", 8*i+j, s);
}
if (DoLaplace) {
L1[LaplaceCnum][i*8+j] += s*s;
L2[LaplaceCnum][i*8+j] += s;
}
dest[i][j] = (int)floor(s+0.499999);
/*
* reason for adding 0.499999 instead of 0.5:
* s is quite often x.5 (at least for i and/or j = 0 or 4)
* and setting the rounding threshold exactly to 0.5 leads to an
* extremely high arithmetic implementation dependency of the result;
* s being between x.5 and x.500001 (which is now incorrectly rounded
* downwards instead of upwards) is assumed to occur less often
* (if at all)
*/
}
}
|