blob: 94ddf9fe4917e6e68ee8ff5a4466f9e4cb1b8955 [file] [log] [blame]
Alexandre Lision67916dd2014-01-24 13:33:04 -05001/*********************************************************************************
2** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C
3** > Software Release 2.1 (2008-06)
4** (Simple repackaging; no change from 2005-05 Release 2.0 code)
5**
6** © 2004 Polycom, Inc.
7**
8** All rights reserved.
9**
10*********************************************************************************/
11
12/*********************************************************************************
13* Filename: dct_type_iv_a.c
14*
15* Purpose: Discrete Cosine Transform, Type IV used for MLT
16*
17* The basis functions are
18*
19* cos(PI*(t+0.5)*(k+0.5)/block_length)
20*
21* for time t and basis function number k. Due to the symmetry of the expression
22* in t and k, it is clear that the forward and inverse transforms are the same.
23*
24*********************************************************************************/
25
26/*********************************************************************************
27 Include files
28*********************************************************************************/
29#include "defs.h"
30#include "count.h"
31#include "dct4_a.h"
32
33/*********************************************************************************
34 External variable declarations
35*********************************************************************************/
36extern Word16 anal_bias[DCT_LENGTH];
37extern Word16 dct_core_a[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32];
38extern cos_msin_t a_cos_msin_2 [DCT_LENGTH_DIV_32];
39extern cos_msin_t a_cos_msin_4 [DCT_LENGTH_DIV_16];
40extern cos_msin_t a_cos_msin_8 [DCT_LENGTH_DIV_8];
41extern cos_msin_t a_cos_msin_16[DCT_LENGTH_DIV_4];
42extern cos_msin_t a_cos_msin_32[DCT_LENGTH_DIV_2];
43extern cos_msin_t a_cos_msin_64[DCT_LENGTH];
44extern cos_msin_t *a_cos_msin_table[];
45
46/*********************************************************************************
47 Function: dct_type_iv_a
48
49 Syntax: void dct_type_iv_a (input, output, dct_length)
50 Word16 input[], output[], dct_length;
51
52 Description: Discrete Cosine Transform, Type IV used for MLT
53
54 Design Notes:
55
56 WMOPS: | 24kbit | 32kbit
57 -------|--------------|----------------
58 AVG | 1.14 | 1.14
59 -------|--------------|----------------
60 MAX | 1.14 | 1.14
61 -------|--------------|----------------
62
63 14kHz | 24kbit | 32kbit | 48kbit
64 -------|--------------|----------------|----------------
65 AVG | 2.57 | 2.57 | 2.57
66 -------|--------------|----------------|----------------
67 MAX | 2.57 | 2.57 | 2.57
68 -------|--------------|----------------|----------------
69
70*********************************************************************************/
71
72void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length)
73{
74 Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH];
75 Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base;
76 Word16 *out_ptr_low, *out_ptr_high, *next_out_base;
77 Word16 *out_buffer, *in_buffer, *buffer_swap;
78 Word16 in_val_low, in_val_high;
79 Word16 out_val_low, out_val_high;
80 Word16 in_low_even, in_low_odd;
81 Word16 in_high_even, in_high_odd;
82 Word16 out_low_even, out_low_odd;
83 Word16 out_high_even, out_high_odd;
84 Word16 *pair_ptr;
85 Word16 cos_even, cos_odd, msin_even, msin_odd;
86 Word16 neg_cos_odd;
87 Word16 neg_msin_even;
88 Word32 sum;
89 Word16 set_span, set_count, set_count_log, pairs_left, sets_left;
90 Word16 i,k;
91 Word16 index;
92 cos_msin_t **table_ptr_ptr, *cos_msin_ptr;
93
94 Word16 temp;
95 Word32 acca;
96
97 Word16 dct_length_log;
98
99
100 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
101 /* Do the sum/difference butterflies, the first part of */
102 /* converting one N-point transform into N/2 two-point */
103 /* transforms, where N = 1 << DCT_LENGTH_LOG. = 64/128 */
104 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
105 test();
106 if (dct_length==DCT_LENGTH)
107 {
108 dct_length_log = DCT_LENGTH_LOG;
109
110 /* Add bias offsets */
111 for (i=0;i<dct_length;i++)
112 {
113 input[i] = add(input[i],anal_bias[i]);
114 move16();
115 }
116 }
117 else
118 dct_length_log = MAX_DCT_LENGTH_LOG;
119
120 index = 0L;
121 move16();
122
123 in_buffer = input;
124 move16();
125
126 out_buffer = buffer_a;
127 move16();
128
129 temp = sub(dct_length_log,2);
130 for (set_count_log=0;set_count_log<=temp;set_count_log++)
131 {
132
133 /*===========================================================*/
134 /* Initialization for the loop over sets at the current size */
135 /*===========================================================*/
136
137 /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
138 set_span = shr_nocheck(dct_length,set_count_log);
139
140 set_count = shl_nocheck(1,set_count_log);
141
142 in_ptr = in_buffer;
143 move16();
144
145 next_out_base = out_buffer;
146 move16();
147
148 /*=====================================*/
149 /* Loop over all the sets of this size */
150 /*=====================================*/
151
152 for (sets_left=set_count;sets_left>0;sets_left--)
153 {
154
155 /*||||||||||||||||||||||||||||||||||||||||||||*/
156 /* Set up output pointers for the current set */
157 /*||||||||||||||||||||||||||||||||||||||||||||*/
158
159 out_ptr_low = next_out_base;
160 next_out_base = next_out_base + set_span;
161 out_ptr_high = next_out_base;
162
163 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
164 /* Loop over all the butterflies in the current set */
165 /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
166
167 do
168 {
169 in_val_low = *in_ptr++;
170 in_val_high = *in_ptr++;
171 // blp: addition of two 16bits vars, there's no way
172 // they'll overflow a 32bit var
173 //acca = L_add(in_val_low,in_val_high);
174 acca = (in_val_low + in_val_high);
175 acca = L_shr_nocheck(acca,1);
176 out_val_low = extract_l(acca);
177
178 acca = L_sub(in_val_low,in_val_high);
179 acca = L_shr_nocheck(acca,1);
180 out_val_high = extract_l(acca);
181
182 *out_ptr_low++ = out_val_low;
183 *--out_ptr_high = out_val_high;
184
185 test();
186 } while (out_ptr_low < out_ptr_high);
187
188 } /* End of loop over sets of the current size */
189
190 /*============================================================*/
191 /* Decide which buffers to use as input and output next time. */
192 /* Except for the first time (when the input buffer is the */
193 /* subroutine input) we just alternate the local buffers. */
194 /*============================================================*/
195
196 in_buffer = out_buffer;
197 move16();
198 if (out_buffer == buffer_a)
199 out_buffer = buffer_b;
200 else
201 out_buffer = buffer_a;
202 index = add(index,1);
203
204 } /* End of loop over set sizes */
205
206
207 /*++++++++++++++++++++++++++++++++*/
208 /* Do N/2 two-point transforms, */
209 /* where N = 1 << DCT_LENGTH_LOG */
210 /*++++++++++++++++++++++++++++++++*/
211
212 pair_ptr = in_buffer;
213 move16();
214
215 buffer_swap = buffer_c;
216 move16();
217
218 temp = sub(dct_length_log,1);
219 temp = shl_nocheck(1,temp);
220
221 for (pairs_left=temp; pairs_left > 0; pairs_left--)
222 {
223 for ( k=0; k<CORE_SIZE; k++ )
224 {
225#if PJ_HAS_INT64
226 /* blp: danger danger! not really compatible but faster */
227 pj_int64_t sum64=0;
228 move32();
229
230 for ( i=0; i<CORE_SIZE; i++ )
231 {
232 sum64 += L_mult(pair_ptr[i], dct_core_a[i][k]);
233 }
234 sum = L_saturate(sum64);
235#else
236 sum=0L;
237 move32();
238 for ( i=0; i<CORE_SIZE; i++ )
239 {
240 sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]);
241 }
242#endif
243 buffer_swap[k] = itu_round(sum);
244 }
245 /* address arithmetic */
246 pair_ptr += CORE_SIZE;
247 buffer_swap += CORE_SIZE;
248 }
249
250 for (i=0;i<dct_length;i++)
251 {
252 in_buffer[i] = buffer_c[i];
253 move16();
254 }
255
256 table_ptr_ptr = a_cos_msin_table;
257
258 /*++++++++++++++++++++++++++++++*/
259 /* Perform rotation butterflies */
260 /*++++++++++++++++++++++++++++++*/
261 temp = sub(dct_length_log,2);
262 for (set_count_log = temp; set_count_log >= 0; set_count_log--)
263 {
264 /*===========================================================*/
265 /* Initialization for the loop over sets at the current size */
266 /*===========================================================*/
267 /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
268 set_span = shr_nocheck(dct_length,set_count_log);
269
270 set_count = shl_nocheck(1,set_count_log);
271 next_in_base = in_buffer;
272 move16();
273
274 test();
275 if (set_count_log == 0)
276 {
277 next_out_base = output;
278 }
279 else
280 {
281 next_out_base = out_buffer;
282 }
283
284
285 /*=====================================*/
286 /* Loop over all the sets of this size */
287 /*=====================================*/
288 for (sets_left = set_count; sets_left > 0;sets_left--)
289 {
290 /*|||||||||||||||||||||||||||||||||||||||||*/
291 /* Set up the pointers for the current set */
292 /*|||||||||||||||||||||||||||||||||||||||||*/
293 in_ptr_low = next_in_base;
294 move16();
295 temp = shr_nocheck(set_span,1);
296
297 /* address arithmetic */
298 in_ptr_high = in_ptr_low + temp;
299 next_in_base += set_span;
300 out_ptr_low = next_out_base;
301 next_out_base += set_span;
302 out_ptr_high = next_out_base;
303 cos_msin_ptr = *table_ptr_ptr;
304
305 /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
306 /* Loop over all the butterfly pairs in the current set */
307 /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
308
309 do
310 {
311 /* address arithmetic */
312 in_low_even = *in_ptr_low++;
313 in_low_odd = *in_ptr_low++;
314 in_high_even = *in_ptr_high++;
315 in_high_odd = *in_ptr_high++;
316 cos_even = cos_msin_ptr[0].cosine;
317 move16();
318 msin_even = cos_msin_ptr[0].minus_sine;
319 move16();
320 cos_odd = cos_msin_ptr[1].cosine;
321 move16();
322 msin_odd = cos_msin_ptr[1].minus_sine;
323 move16();
324 cos_msin_ptr += 2;
325
326 sum = 0L;
327 sum=L_mac(sum,cos_even,in_low_even);
328 neg_msin_even = negate(msin_even);
329 sum=L_mac(sum,neg_msin_even,in_high_even);
330 out_low_even = itu_round(sum);
331
332 sum = 0L;
333 sum=L_mac(sum,msin_even,in_low_even);
334 sum=L_mac(sum,cos_even,in_high_even);
335 out_high_even= itu_round(sum);
336
337 sum = 0L;
338 sum=L_mac(sum,cos_odd,in_low_odd);
339 sum=L_mac(sum,msin_odd,in_high_odd);
340 out_low_odd= itu_round(sum);
341
342 sum = 0L;
343 sum=L_mac(sum,msin_odd,in_low_odd);
344 neg_cos_odd = negate(cos_odd);
345 sum=L_mac(sum,neg_cos_odd,in_high_odd);
346 out_high_odd= itu_round(sum);
347
348 *out_ptr_low++ = out_low_even;
349 *--out_ptr_high = out_high_even;
350 *out_ptr_low++ = out_low_odd;
351 *--out_ptr_high = out_high_odd;
352 test();
353 } while (out_ptr_low < out_ptr_high);
354
355 } /* End of loop over sets of the current size */
356
357 /*=============================================*/
358 /* Swap input and output buffers for next time */
359 /*=============================================*/
360
361 buffer_swap = in_buffer;
362 in_buffer = out_buffer;
363 out_buffer = buffer_swap;
364 table_ptr_ptr++;
365 }
366}
367