blob: 0123a13bb46fcebb84317c2407056a7be8ff7c26 [file] [log] [blame]
Nanang Izzuddin57b88572009-04-01 12:05:34 +00001/********************************************************************************
2**
3** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C
4** > Software Release 2.1 (2008-06)
5** (Simple repackaging; no change from 2005-05 Release 2.0 code)
6**
7** © 2004 Polycom, Inc.
8**
9** All rights reserved.
10**
11********************************************************************************/
12
13/********************************************************************************
14* Filename: dct_type_iv_s.c
15*
16* Purpose: Discrete Cosine Transform, Type IV used for inverse MLT
17*
18* The basis functions are
19*
20* cos(PI*(t+0.5)*(k+0.5)/block_length)
21*
22* for time t and basis function number k. Due to the symmetry of the expression
23* in t and k, it is clear that the forward and inverse transforms are the same.
24*
25*********************************************************************************/
26
27/***************************************************************************
28 Include files
29***************************************************************************/
30#include "defs.h"
31#include "count.h"
32#include "dct4_s.h"
33
34/***************************************************************************
35 External variable declarations
36***************************************************************************/
37extern Word16 syn_bias_7khz[DCT_LENGTH];
38extern Word16 dither[DCT_LENGTH];
39extern Word16 max_dither[MAX_DCT_LENGTH];
40
41extern Word16 dct_core_s[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32];
42extern cos_msin_t s_cos_msin_2[DCT_LENGTH_DIV_32];
43extern cos_msin_t s_cos_msin_4[DCT_LENGTH_DIV_16];
44extern cos_msin_t s_cos_msin_8[DCT_LENGTH_DIV_8];
45extern cos_msin_t s_cos_msin_16[DCT_LENGTH_DIV_4];
46extern cos_msin_t s_cos_msin_32[DCT_LENGTH_DIV_2];
47extern cos_msin_t s_cos_msin_64[DCT_LENGTH];
48extern cos_msin_t *s_cos_msin_table[];
49
50/********************************************************************************
51 Function: dct_type_iv_s
52
53 Syntax: void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
54
55
56 Description: Discrete Cosine Transform, Type IV used for inverse MLT
57
58 Design Notes:
59
60 WMOPS: 7kHz | 24kbit | 32kbit
61 -------|--------------|----------------
62 AVG | 1.74 | 1.74
63 -------|--------------|----------------
64 MAX | 1.74 | 1.74
65 -------|--------------|----------------
66
67 14kHz | 24kbit | 32kbit | 48kbit
68 -------|--------------|----------------|----------------
69 AVG | 3.62 | 3.62 | 3.62
70 -------|--------------|----------------|----------------
71 MAX | 3.62 | 3.62 | 3.62
72 -------|--------------|----------------|----------------
73
74********************************************************************************/
75
76void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
77{
78 Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH];
79 Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base;
80 Word16 *out_ptr_low, *out_ptr_high, *next_out_base;
81 Word16 *out_buffer, *in_buffer, *buffer_swap;
82 Word16 in_val_low, in_val_high;
83 Word16 out_val_low, out_val_high;
84 Word16 in_low_even, in_low_odd;
85 Word16 in_high_even, in_high_odd;
86 Word16 out_low_even, out_low_odd;
87 Word16 out_high_even, out_high_odd;
88 Word16 *pair_ptr;
89 Word16 cos_even, cos_odd, msin_even, msin_odd;
90 Word16 set_span, set_count, set_count_log, pairs_left, sets_left;
91 Word16 i,k;
92 Word16 index;
93 Word16 dummy;
94 Word32 sum;
95 cos_msin_t **table_ptr_ptr, *cos_msin_ptr;
96
97 Word32 acca;
98 Word16 temp;
99
100 Word16 dct_length_log;
101 Word16 *dither_ptr;
102
103 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
104 /* Do the sum/difference butterflies, the first part of */
105 /* converting one N-point transform into 32 - 10 point transforms */
106 /* transforms, where N = 1 << DCT_LENGTH_LOG. */
107 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
108 test();
109 if (dct_length==DCT_LENGTH)
110 {
111 dct_length_log = DCT_LENGTH_LOG;
112 move16();
113 dither_ptr = dither;
114 move16();
115 }
116 else
117 {
118 dct_length_log = MAX_DCT_LENGTH_LOG;
119 move16();
120 dither_ptr = max_dither;
121 move16();
122 }
123
124 in_buffer = input;
125 move16();
126 out_buffer = buffer_a;
127 move16();
128
129 index=0;
130 move16();
131
132 i=0;
133 move16();
134
135 for (set_count_log = 0; set_count_log <= dct_length_log - 2; set_count_log++)
136 {
137
138 /*===========================================================*/
139 /* Initialization for the loop over sets at the current size */
140 /*===========================================================*/
141
142 /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
Benny Prijono3594ab32009-04-18 14:29:28 +0000143 set_span = shr_nocheck(dct_length,set_count_log);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000144
Benny Prijono3594ab32009-04-18 14:29:28 +0000145 set_count = shl_nocheck(1,set_count_log);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000146 in_ptr = in_buffer;
147 move16();
148 next_out_base = out_buffer;
149 move16();
150
151 /*=====================================*/
152 /* Loop over all the sets of this size */
153 /*=====================================*/
154 temp = sub(index,1);
155 test();
156 if(temp < 0)
157 {
158 for (sets_left = set_count;sets_left > 0;sets_left--)
159 {
160
161 /*||||||||||||||||||||||||||||||||||||||||||||*/
162 /* Set up output pointers for the current set */
163 /*||||||||||||||||||||||||||||||||||||||||||||*/
164 /* pointer arithmetic */
165 out_ptr_low = next_out_base;
166 move16();
167 next_out_base += set_span;
168 move16();
169 out_ptr_high = next_out_base;
170 move16();
171
172 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
173 /* Loop over all the butterflies in the current set */
174 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
175
176 do
177 {
178 in_val_low = *in_ptr++;
179 move16();
180 in_val_high = *in_ptr++;
181 move16();
182
183 /* BEST METHOD OF GETTING RID OF BIAS, BUT COMPUTATIONALLY UNPLEASANT */
184 /* ALTERNATIVE METHOD, SMEARS BIAS OVER THE ENTIRE FRAME, COMPUTATIONALLY SIMPLEST. */
185 /* IF THIS WORKS, IT'S PREFERABLE */
186
187 dummy = add(in_val_low,dither_ptr[i++]);
Benny Prijono3594ab32009-04-18 14:29:28 +0000188 // blp: addition of two 16bits vars, there's no way
189 // they'll overflow a 32bit var
190 //acca = L_add(dummy,in_val_high);
191 acca = dummy + in_val_high;
192 out_val_low = extract_l(L_shr_nocheck(acca,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000193
194 dummy = add(in_val_low,dither_ptr[i++]);
Benny Prijono3594ab32009-04-18 14:29:28 +0000195 // blp: addition of two 16bits vars, there's no way
196 // they'll overflow a 32bit var
197 //acca = L_add(dummy,-in_val_high);
198 acca = dummy - in_val_high;
199 out_val_high = extract_l(L_shr_nocheck(acca,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000200
201 *out_ptr_low++ = out_val_low;
202 move16();
203 *--out_ptr_high = out_val_high;
204 move16();
205
206 test();
207
208 /* this involves comparison of pointers */
209 /* pointer arithmetic */
210
211 } while (out_ptr_low < out_ptr_high);
212
213 } /* End of loop over sets of the current size */
214 }
215 else
216 {
217 for (sets_left = set_count; sets_left > 0; sets_left--)
218 {
219 /*||||||||||||||||||||||||||||||||||||||||||||*/
220 /* Set up output pointers for the current set */
221 /*||||||||||||||||||||||||||||||||||||||||||||*/
222
223 out_ptr_low = next_out_base;
224 move16();
225 next_out_base += set_span;
226 move16();
227 out_ptr_high = next_out_base;
228 move16();
229
230 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
231 /* Loop over all the butterflies in the current set */
232 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
233
234 do
235 {
236 in_val_low = *in_ptr++;
237 move16();
238 in_val_high = *in_ptr++;
239 move16();
240
241 out_val_low = add(in_val_low,in_val_high);
242 out_val_high = add(in_val_low,negate(in_val_high));
243
244 *out_ptr_low++ = out_val_low;
245 move16();
246 *--out_ptr_high = out_val_high;
247 move16();
248
249 test();
250 } while (out_ptr_low < out_ptr_high);
251
252 } /* End of loop over sets of the current size */
253 }
254
255 /*============================================================*/
256 /* Decide which buffers to use as input and output next time. */
257 /* Except for the first time (when the input buffer is the */
258 /* subroutine input) we just alternate the local buffers. */
259 /*============================================================*/
260
261 in_buffer = out_buffer;
262 move16();
263
264 test();
265 if (out_buffer == buffer_a)
266 {
267 out_buffer = buffer_b;
268 move16();
269 }
270 else
271 {
272 out_buffer = buffer_a;
273 move16();
274 }
275
276 index = add(index,1);
277 } /* End of loop over set sizes */
278
279
280 /*++++++++++++++++++++++++++++++++*/
281 /* Do 32 - 10 point transforms */
282 /*++++++++++++++++++++++++++++++++*/
283
284 pair_ptr = in_buffer;
285 move16();
286 buffer_swap = buffer_c;
287 move16();
288
289 for (pairs_left = 1 << (dct_length_log - 1); pairs_left > 0; pairs_left--)
290 {
291 for ( k=0; k<CORE_SIZE; k++ )
292 {
Benny Prijono3594ab32009-04-18 14:29:28 +0000293#if PJ_HAS_INT64
294 /* blp: danger danger! not really compatible but faster */
295 pj_int64_t sum64=0;
296 move32();
297
298 for ( i=0; i<CORE_SIZE; i++ )
299 {
300 sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]);
301 }
302 sum = L_saturate(sum64);
303#else
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000304 sum=0L;
305 move32();
306
307 for ( i=0; i<CORE_SIZE; i++ )
308 {
309 sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]);
310 }
Benny Prijono3594ab32009-04-18 14:29:28 +0000311#endif
Nanang Izzuddin56e380a2009-04-15 14:45:41 +0000312 buffer_swap[k] = itu_round(sum);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000313 }
314
315 pair_ptr += CORE_SIZE;
316 move16();
317 buffer_swap += CORE_SIZE;
318 move16();
319 }
320
321 for (i=0;i<dct_length;i++)
322 {
323 in_buffer[i] = buffer_c[i];
324 move16();
325 }
326
327 table_ptr_ptr = s_cos_msin_table;
328 move16();
329
330 /*++++++++++++++++++++++++++++++*/
331 /* Perform rotation butterflies */
332 /*++++++++++++++++++++++++++++++*/
333 index=0;
334 move16();
335
336 for (set_count_log = dct_length_log - 2 ; set_count_log >= 0; set_count_log--)
337 {
338
339 /*===========================================================*/
340 /* Initialization for the loop over sets at the current size */
341 /*===========================================================*/
342
343 /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
Benny Prijono3594ab32009-04-18 14:29:28 +0000344 set_span = shr_nocheck(dct_length,set_count_log);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000345
Benny Prijono3594ab32009-04-18 14:29:28 +0000346 set_count = shl_nocheck(1,set_count_log);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000347 next_in_base = in_buffer;
348 move16();
349 test();
350 if (set_count_log == 0)
351 {
352 next_out_base = output;
353 move16();
354 }
355 else
356 {
357 next_out_base = out_buffer;
358 move16();
359 }
360
361 /*=====================================*/
362 /* Loop over all the sets of this size */
363 /*=====================================*/
364
365 for (sets_left = set_count; sets_left > 0; sets_left--)
366 {
367
368 /*|||||||||||||||||||||||||||||||||||||||||*/
369 /* Set up the pointers for the current set */
370 /*|||||||||||||||||||||||||||||||||||||||||*/
371
372 in_ptr_low = next_in_base;
373 move16();
374
Benny Prijono3594ab32009-04-18 14:29:28 +0000375 temp = shr_nocheck(set_span,1);
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000376 in_ptr_high = in_ptr_low + temp;
377 move16();
378
379 next_in_base += set_span;
380 move16();
381
382 out_ptr_low = next_out_base;
383 move16();
384
385 next_out_base += set_span;
386 move16();
387 out_ptr_high = next_out_base;
388 move16();
389
390 cos_msin_ptr = *table_ptr_ptr;
391 move16();
392
393 /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
394 /* Loop over all the butterfly pairs in the current set */
395 /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
396
397 do
398 {
399 in_low_even = *in_ptr_low++;
400 move16();
401 in_low_odd = *in_ptr_low++;
402 move16();
403 in_high_even = *in_ptr_high++;
404 move16();
405 in_high_odd = *in_ptr_high++;
406 move16();
407 cos_even = cos_msin_ptr[0].cosine;
408 move16();
409 msin_even = cos_msin_ptr[0].minus_sine;
410 move16();
411 cos_odd = cos_msin_ptr[1].cosine;
412 move16();
413 msin_odd = cos_msin_ptr[1].minus_sine;
414 move16();
415 cos_msin_ptr += 2;
416
417 sum = 0L;
418 move32();
419
420 sum = L_mac(sum,cos_even,in_low_even);
421 sum = L_mac(sum,negate(msin_even),in_high_even);
Benny Prijono3594ab32009-04-18 14:29:28 +0000422 out_low_even = itu_round(L_shl_nocheck(sum,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000423
424 sum = 0L;
425 move32();
426 sum = L_mac(sum,msin_even,in_low_even);
427 sum = L_mac(sum,cos_even,in_high_even);
Benny Prijono3594ab32009-04-18 14:29:28 +0000428 out_high_even = itu_round(L_shl_nocheck(sum,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000429
430 sum = 0L;
431 move32();
432 sum = L_mac(sum,cos_odd,in_low_odd);
433 sum = L_mac(sum,msin_odd,in_high_odd);
Benny Prijono3594ab32009-04-18 14:29:28 +0000434 out_low_odd = itu_round(L_shl_nocheck(sum,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000435
436 sum = 0L;
437 move32();
438 sum = L_mac(sum,msin_odd,in_low_odd);
439 sum = L_mac(sum,negate(cos_odd),in_high_odd);
Benny Prijono3594ab32009-04-18 14:29:28 +0000440 out_high_odd = itu_round(L_shl_nocheck(sum,1));
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000441
442 *out_ptr_low++ = out_low_even;
443 move16();
444 *--out_ptr_high = out_high_even;
445 move16();
446 *out_ptr_low++ = out_low_odd;
447 move16();
448 *--out_ptr_high = out_high_odd;
449 move16();
450
451 test();
452 } while (out_ptr_low < out_ptr_high);
453
454 } /* End of loop over sets of the current size */
455
456 /*=============================================*/
457 /* Swap input and output buffers for next time */
458 /*=============================================*/
459
460 buffer_swap = in_buffer;
461 move16();
462 in_buffer = out_buffer;
463 move16();
464 out_buffer = buffer_swap;
465 move16();
466
467 index = add(index,1);
468 table_ptr_ptr++;
469 }
470 /*------------------------------------
471
472 ADD IN BIAS FOR OUTPUT
473
474 -----------------------------------*/
475 if (dct_length==DCT_LENGTH)
476 {
477 for(i=0;i<320;i++)
478 {
Benny Prijono3594ab32009-04-18 14:29:28 +0000479 // blp: addition of two 16bits vars, there's no way
480 // they'll overflow a 32bit var
481 //sum = L_add(output[i],syn_bias_7khz[i]);
482 sum = output[i] + syn_bias_7khz[i];
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000483 acca = L_sub(sum,32767);
484 test();
485 if (acca > 0)
486 {
487 sum = 32767L;
488 move32();
489 }
Benny Prijono3594ab32009-04-18 14:29:28 +0000490 // blp: addition of two 16bits vars, there's no way
491 // they'll overflow 32bit var
492 //acca = L_add(sum,32768L);
493 acca = sum + 32768;
Nanang Izzuddin57b88572009-04-01 12:05:34 +0000494 test();
495 if (acca < 0)
496 {
497 sum = -32768L;
498 move32();
499 }
500 output[i] = extract_l(sum);
501 }
502 }
503}
504