Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /********************************************************************************* |
| 2 | ** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C |
| 3 | ** > Software Release 2.1 (2008-06) |
| 4 | ** (Simple repackaging; no change from 2005-05 Release 2.0 code) |
| 5 | ** |
| 6 | ** © 2004 Polycom, Inc. |
| 7 | ** |
| 8 | ** All rights reserved. |
| 9 | ** |
| 10 | *********************************************************************************/ |
| 11 | |
| 12 | /********************************************************************************* |
| 13 | * Filename: dct_type_iv_a.c |
| 14 | * |
| 15 | * Purpose: Discrete Cosine Transform, Type IV used for MLT |
| 16 | * |
| 17 | * The basis functions are |
| 18 | * |
| 19 | * cos(PI*(t+0.5)*(k+0.5)/block_length) |
| 20 | * |
| 21 | * for time t and basis function number k. Due to the symmetry of the expression |
| 22 | * in t and k, it is clear that the forward and inverse transforms are the same. |
| 23 | * |
| 24 | *********************************************************************************/ |
| 25 | |
| 26 | /********************************************************************************* |
| 27 | Include files |
| 28 | *********************************************************************************/ |
| 29 | #include "defs.h" |
| 30 | #include "count.h" |
| 31 | #include "dct4_a.h" |
| 32 | |
| 33 | /********************************************************************************* |
| 34 | External variable declarations |
| 35 | *********************************************************************************/ |
| 36 | extern Word16 anal_bias[DCT_LENGTH]; |
| 37 | extern Word16 dct_core_a[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32]; |
| 38 | extern cos_msin_t a_cos_msin_2 [DCT_LENGTH_DIV_32]; |
| 39 | extern cos_msin_t a_cos_msin_4 [DCT_LENGTH_DIV_16]; |
| 40 | extern cos_msin_t a_cos_msin_8 [DCT_LENGTH_DIV_8]; |
| 41 | extern cos_msin_t a_cos_msin_16[DCT_LENGTH_DIV_4]; |
| 42 | extern cos_msin_t a_cos_msin_32[DCT_LENGTH_DIV_2]; |
| 43 | extern cos_msin_t a_cos_msin_64[DCT_LENGTH]; |
| 44 | extern cos_msin_t *a_cos_msin_table[]; |
| 45 | |
| 46 | /********************************************************************************* |
| 47 | Function: dct_type_iv_a |
| 48 | |
| 49 | Syntax: void dct_type_iv_a (input, output, dct_length) |
| 50 | Word16 input[], output[], dct_length; |
| 51 | |
| 52 | Description: Discrete Cosine Transform, Type IV used for MLT |
| 53 | |
| 54 | Design Notes: |
| 55 | |
| 56 | WMOPS: | 24kbit | 32kbit |
| 57 | -------|--------------|---------------- |
| 58 | AVG | 1.14 | 1.14 |
| 59 | -------|--------------|---------------- |
| 60 | MAX | 1.14 | 1.14 |
| 61 | -------|--------------|---------------- |
| 62 | |
| 63 | 14kHz | 24kbit | 32kbit | 48kbit |
| 64 | -------|--------------|----------------|---------------- |
| 65 | AVG | 2.57 | 2.57 | 2.57 |
| 66 | -------|--------------|----------------|---------------- |
| 67 | MAX | 2.57 | 2.57 | 2.57 |
| 68 | -------|--------------|----------------|---------------- |
| 69 | |
| 70 | *********************************************************************************/ |
| 71 | |
| 72 | void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) |
| 73 | { |
| 74 | Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH]; |
| 75 | Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base; |
| 76 | Word16 *out_ptr_low, *out_ptr_high, *next_out_base; |
| 77 | Word16 *out_buffer, *in_buffer, *buffer_swap; |
| 78 | Word16 in_val_low, in_val_high; |
| 79 | Word16 out_val_low, out_val_high; |
| 80 | Word16 in_low_even, in_low_odd; |
| 81 | Word16 in_high_even, in_high_odd; |
| 82 | Word16 out_low_even, out_low_odd; |
| 83 | Word16 out_high_even, out_high_odd; |
| 84 | Word16 *pair_ptr; |
| 85 | Word16 cos_even, cos_odd, msin_even, msin_odd; |
| 86 | Word16 neg_cos_odd; |
| 87 | Word16 neg_msin_even; |
| 88 | Word32 sum; |
| 89 | Word16 set_span, set_count, set_count_log, pairs_left, sets_left; |
| 90 | Word16 i,k; |
| 91 | Word16 index; |
| 92 | cos_msin_t **table_ptr_ptr, *cos_msin_ptr; |
| 93 | |
| 94 | Word16 temp; |
| 95 | Word32 acca; |
| 96 | |
| 97 | Word16 dct_length_log; |
| 98 | |
| 99 | |
| 100 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ |
| 101 | /* Do the sum/difference butterflies, the first part of */ |
| 102 | /* converting one N-point transform into N/2 two-point */ |
| 103 | /* transforms, where N = 1 << DCT_LENGTH_LOG. = 64/128 */ |
| 104 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ |
| 105 | test(); |
| 106 | if (dct_length==DCT_LENGTH) |
| 107 | { |
| 108 | dct_length_log = DCT_LENGTH_LOG; |
| 109 | |
| 110 | /* Add bias offsets */ |
| 111 | for (i=0;i<dct_length;i++) |
| 112 | { |
| 113 | input[i] = add(input[i],anal_bias[i]); |
| 114 | move16(); |
| 115 | } |
| 116 | } |
| 117 | else |
| 118 | dct_length_log = MAX_DCT_LENGTH_LOG; |
| 119 | |
| 120 | index = 0L; |
| 121 | move16(); |
| 122 | |
| 123 | in_buffer = input; |
| 124 | move16(); |
| 125 | |
| 126 | out_buffer = buffer_a; |
| 127 | move16(); |
| 128 | |
| 129 | temp = sub(dct_length_log,2); |
| 130 | for (set_count_log=0;set_count_log<=temp;set_count_log++) |
| 131 | { |
| 132 | |
| 133 | /*===========================================================*/ |
| 134 | /* Initialization for the loop over sets at the current size */ |
| 135 | /*===========================================================*/ |
| 136 | |
| 137 | /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ |
| 138 | set_span = shr_nocheck(dct_length,set_count_log); |
| 139 | |
| 140 | set_count = shl_nocheck(1,set_count_log); |
| 141 | |
| 142 | in_ptr = in_buffer; |
| 143 | move16(); |
| 144 | |
| 145 | next_out_base = out_buffer; |
| 146 | move16(); |
| 147 | |
| 148 | /*=====================================*/ |
| 149 | /* Loop over all the sets of this size */ |
| 150 | /*=====================================*/ |
| 151 | |
| 152 | for (sets_left=set_count;sets_left>0;sets_left--) |
| 153 | { |
| 154 | |
| 155 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 156 | /* Set up output pointers for the current set */ |
| 157 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 158 | |
| 159 | out_ptr_low = next_out_base; |
| 160 | next_out_base = next_out_base + set_span; |
| 161 | out_ptr_high = next_out_base; |
| 162 | |
| 163 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 164 | /* Loop over all the butterflies in the current set */ |
| 165 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 166 | |
| 167 | do |
| 168 | { |
| 169 | in_val_low = *in_ptr++; |
| 170 | in_val_high = *in_ptr++; |
| 171 | // blp: addition of two 16bits vars, there's no way |
| 172 | // they'll overflow a 32bit var |
| 173 | //acca = L_add(in_val_low,in_val_high); |
| 174 | acca = (in_val_low + in_val_high); |
| 175 | acca = L_shr_nocheck(acca,1); |
| 176 | out_val_low = extract_l(acca); |
| 177 | |
| 178 | acca = L_sub(in_val_low,in_val_high); |
| 179 | acca = L_shr_nocheck(acca,1); |
| 180 | out_val_high = extract_l(acca); |
| 181 | |
| 182 | *out_ptr_low++ = out_val_low; |
| 183 | *--out_ptr_high = out_val_high; |
| 184 | |
| 185 | test(); |
| 186 | } while (out_ptr_low < out_ptr_high); |
| 187 | |
| 188 | } /* End of loop over sets of the current size */ |
| 189 | |
| 190 | /*============================================================*/ |
| 191 | /* Decide which buffers to use as input and output next time. */ |
| 192 | /* Except for the first time (when the input buffer is the */ |
| 193 | /* subroutine input) we just alternate the local buffers. */ |
| 194 | /*============================================================*/ |
| 195 | |
| 196 | in_buffer = out_buffer; |
| 197 | move16(); |
| 198 | if (out_buffer == buffer_a) |
| 199 | out_buffer = buffer_b; |
| 200 | else |
| 201 | out_buffer = buffer_a; |
| 202 | index = add(index,1); |
| 203 | |
| 204 | } /* End of loop over set sizes */ |
| 205 | |
| 206 | |
| 207 | /*++++++++++++++++++++++++++++++++*/ |
| 208 | /* Do N/2 two-point transforms, */ |
| 209 | /* where N = 1 << DCT_LENGTH_LOG */ |
| 210 | /*++++++++++++++++++++++++++++++++*/ |
| 211 | |
| 212 | pair_ptr = in_buffer; |
| 213 | move16(); |
| 214 | |
| 215 | buffer_swap = buffer_c; |
| 216 | move16(); |
| 217 | |
| 218 | temp = sub(dct_length_log,1); |
| 219 | temp = shl_nocheck(1,temp); |
| 220 | |
| 221 | for (pairs_left=temp; pairs_left > 0; pairs_left--) |
| 222 | { |
| 223 | for ( k=0; k<CORE_SIZE; k++ ) |
| 224 | { |
| 225 | #if PJ_HAS_INT64 |
| 226 | /* blp: danger danger! not really compatible but faster */ |
| 227 | pj_int64_t sum64=0; |
| 228 | move32(); |
| 229 | |
| 230 | for ( i=0; i<CORE_SIZE; i++ ) |
| 231 | { |
| 232 | sum64 += L_mult(pair_ptr[i], dct_core_a[i][k]); |
| 233 | } |
| 234 | sum = L_saturate(sum64); |
| 235 | #else |
| 236 | sum=0L; |
| 237 | move32(); |
| 238 | for ( i=0; i<CORE_SIZE; i++ ) |
| 239 | { |
| 240 | sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]); |
| 241 | } |
| 242 | #endif |
| 243 | buffer_swap[k] = itu_round(sum); |
| 244 | } |
| 245 | /* address arithmetic */ |
| 246 | pair_ptr += CORE_SIZE; |
| 247 | buffer_swap += CORE_SIZE; |
| 248 | } |
| 249 | |
| 250 | for (i=0;i<dct_length;i++) |
| 251 | { |
| 252 | in_buffer[i] = buffer_c[i]; |
| 253 | move16(); |
| 254 | } |
| 255 | |
| 256 | table_ptr_ptr = a_cos_msin_table; |
| 257 | |
| 258 | /*++++++++++++++++++++++++++++++*/ |
| 259 | /* Perform rotation butterflies */ |
| 260 | /*++++++++++++++++++++++++++++++*/ |
| 261 | temp = sub(dct_length_log,2); |
| 262 | for (set_count_log = temp; set_count_log >= 0; set_count_log--) |
| 263 | { |
| 264 | /*===========================================================*/ |
| 265 | /* Initialization for the loop over sets at the current size */ |
| 266 | /*===========================================================*/ |
| 267 | /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ |
| 268 | set_span = shr_nocheck(dct_length,set_count_log); |
| 269 | |
| 270 | set_count = shl_nocheck(1,set_count_log); |
| 271 | next_in_base = in_buffer; |
| 272 | move16(); |
| 273 | |
| 274 | test(); |
| 275 | if (set_count_log == 0) |
| 276 | { |
| 277 | next_out_base = output; |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | next_out_base = out_buffer; |
| 282 | } |
| 283 | |
| 284 | |
| 285 | /*=====================================*/ |
| 286 | /* Loop over all the sets of this size */ |
| 287 | /*=====================================*/ |
| 288 | for (sets_left = set_count; sets_left > 0;sets_left--) |
| 289 | { |
| 290 | /*|||||||||||||||||||||||||||||||||||||||||*/ |
| 291 | /* Set up the pointers for the current set */ |
| 292 | /*|||||||||||||||||||||||||||||||||||||||||*/ |
| 293 | in_ptr_low = next_in_base; |
| 294 | move16(); |
| 295 | temp = shr_nocheck(set_span,1); |
| 296 | |
| 297 | /* address arithmetic */ |
| 298 | in_ptr_high = in_ptr_low + temp; |
| 299 | next_in_base += set_span; |
| 300 | out_ptr_low = next_out_base; |
| 301 | next_out_base += set_span; |
| 302 | out_ptr_high = next_out_base; |
| 303 | cos_msin_ptr = *table_ptr_ptr; |
| 304 | |
| 305 | /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 306 | /* Loop over all the butterfly pairs in the current set */ |
| 307 | /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 308 | |
| 309 | do |
| 310 | { |
| 311 | /* address arithmetic */ |
| 312 | in_low_even = *in_ptr_low++; |
| 313 | in_low_odd = *in_ptr_low++; |
| 314 | in_high_even = *in_ptr_high++; |
| 315 | in_high_odd = *in_ptr_high++; |
| 316 | cos_even = cos_msin_ptr[0].cosine; |
| 317 | move16(); |
| 318 | msin_even = cos_msin_ptr[0].minus_sine; |
| 319 | move16(); |
| 320 | cos_odd = cos_msin_ptr[1].cosine; |
| 321 | move16(); |
| 322 | msin_odd = cos_msin_ptr[1].minus_sine; |
| 323 | move16(); |
| 324 | cos_msin_ptr += 2; |
| 325 | |
| 326 | sum = 0L; |
| 327 | sum=L_mac(sum,cos_even,in_low_even); |
| 328 | neg_msin_even = negate(msin_even); |
| 329 | sum=L_mac(sum,neg_msin_even,in_high_even); |
| 330 | out_low_even = itu_round(sum); |
| 331 | |
| 332 | sum = 0L; |
| 333 | sum=L_mac(sum,msin_even,in_low_even); |
| 334 | sum=L_mac(sum,cos_even,in_high_even); |
| 335 | out_high_even= itu_round(sum); |
| 336 | |
| 337 | sum = 0L; |
| 338 | sum=L_mac(sum,cos_odd,in_low_odd); |
| 339 | sum=L_mac(sum,msin_odd,in_high_odd); |
| 340 | out_low_odd= itu_round(sum); |
| 341 | |
| 342 | sum = 0L; |
| 343 | sum=L_mac(sum,msin_odd,in_low_odd); |
| 344 | neg_cos_odd = negate(cos_odd); |
| 345 | sum=L_mac(sum,neg_cos_odd,in_high_odd); |
| 346 | out_high_odd= itu_round(sum); |
| 347 | |
| 348 | *out_ptr_low++ = out_low_even; |
| 349 | *--out_ptr_high = out_high_even; |
| 350 | *out_ptr_low++ = out_low_odd; |
| 351 | *--out_ptr_high = out_high_odd; |
| 352 | test(); |
| 353 | } while (out_ptr_low < out_ptr_high); |
| 354 | |
| 355 | } /* End of loop over sets of the current size */ |
| 356 | |
| 357 | /*=============================================*/ |
| 358 | /* Swap input and output buffers for next time */ |
| 359 | /*=============================================*/ |
| 360 | |
| 361 | buffer_swap = in_buffer; |
| 362 | in_buffer = out_buffer; |
| 363 | out_buffer = buffer_swap; |
| 364 | table_ptr_ptr++; |
| 365 | } |
| 366 | } |
| 367 | |