Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /******************************************************************************** |
| 2 | ** |
| 3 | ** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C |
| 4 | ** > Software Release 2.1 (2008-06) |
| 5 | ** (Simple repackaging; no change from 2005-05 Release 2.0 code) |
| 6 | ** |
| 7 | ** © 2004 Polycom, Inc. |
| 8 | ** |
| 9 | ** All rights reserved. |
| 10 | ** |
| 11 | ********************************************************************************/ |
| 12 | |
| 13 | /******************************************************************************** |
| 14 | * Filename: dct_type_iv_s.c |
| 15 | * |
| 16 | * Purpose: Discrete Cosine Transform, Type IV used for inverse MLT |
| 17 | * |
| 18 | * The basis functions are |
| 19 | * |
| 20 | * cos(PI*(t+0.5)*(k+0.5)/block_length) |
| 21 | * |
| 22 | * for time t and basis function number k. Due to the symmetry of the expression |
| 23 | * in t and k, it is clear that the forward and inverse transforms are the same. |
| 24 | * |
| 25 | *********************************************************************************/ |
| 26 | |
| 27 | /*************************************************************************** |
| 28 | Include files |
| 29 | ***************************************************************************/ |
| 30 | #include "defs.h" |
| 31 | #include "count.h" |
| 32 | #include "dct4_s.h" |
| 33 | |
| 34 | /*************************************************************************** |
| 35 | External variable declarations |
| 36 | ***************************************************************************/ |
| 37 | extern Word16 syn_bias_7khz[DCT_LENGTH]; |
| 38 | extern Word16 dither[DCT_LENGTH]; |
| 39 | extern Word16 max_dither[MAX_DCT_LENGTH]; |
| 40 | |
| 41 | extern Word16 dct_core_s[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32]; |
| 42 | extern cos_msin_t s_cos_msin_2[DCT_LENGTH_DIV_32]; |
| 43 | extern cos_msin_t s_cos_msin_4[DCT_LENGTH_DIV_16]; |
| 44 | extern cos_msin_t s_cos_msin_8[DCT_LENGTH_DIV_8]; |
| 45 | extern cos_msin_t s_cos_msin_16[DCT_LENGTH_DIV_4]; |
| 46 | extern cos_msin_t s_cos_msin_32[DCT_LENGTH_DIV_2]; |
| 47 | extern cos_msin_t s_cos_msin_64[DCT_LENGTH]; |
| 48 | extern cos_msin_t *s_cos_msin_table[]; |
| 49 | |
| 50 | /******************************************************************************** |
| 51 | Function: dct_type_iv_s |
| 52 | |
| 53 | Syntax: void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) |
| 54 | |
| 55 | |
| 56 | Description: Discrete Cosine Transform, Type IV used for inverse MLT |
| 57 | |
| 58 | Design Notes: |
| 59 | |
| 60 | WMOPS: 7kHz | 24kbit | 32kbit |
| 61 | -------|--------------|---------------- |
| 62 | AVG | 1.74 | 1.74 |
| 63 | -------|--------------|---------------- |
| 64 | MAX | 1.74 | 1.74 |
| 65 | -------|--------------|---------------- |
| 66 | |
| 67 | 14kHz | 24kbit | 32kbit | 48kbit |
| 68 | -------|--------------|----------------|---------------- |
| 69 | AVG | 3.62 | 3.62 | 3.62 |
| 70 | -------|--------------|----------------|---------------- |
| 71 | MAX | 3.62 | 3.62 | 3.62 |
| 72 | -------|--------------|----------------|---------------- |
| 73 | |
| 74 | ********************************************************************************/ |
| 75 | |
| 76 | void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) |
| 77 | { |
| 78 | Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH]; |
| 79 | Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base; |
| 80 | Word16 *out_ptr_low, *out_ptr_high, *next_out_base; |
| 81 | Word16 *out_buffer, *in_buffer, *buffer_swap; |
| 82 | Word16 in_val_low, in_val_high; |
| 83 | Word16 out_val_low, out_val_high; |
| 84 | Word16 in_low_even, in_low_odd; |
| 85 | Word16 in_high_even, in_high_odd; |
| 86 | Word16 out_low_even, out_low_odd; |
| 87 | Word16 out_high_even, out_high_odd; |
| 88 | Word16 *pair_ptr; |
| 89 | Word16 cos_even, cos_odd, msin_even, msin_odd; |
| 90 | Word16 set_span, set_count, set_count_log, pairs_left, sets_left; |
| 91 | Word16 i,k; |
| 92 | Word16 index; |
| 93 | Word16 dummy; |
| 94 | Word32 sum; |
| 95 | cos_msin_t **table_ptr_ptr, *cos_msin_ptr; |
| 96 | |
| 97 | Word32 acca; |
| 98 | Word16 temp; |
| 99 | |
| 100 | Word16 dct_length_log; |
| 101 | Word16 *dither_ptr; |
| 102 | |
| 103 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ |
| 104 | /* Do the sum/difference butterflies, the first part of */ |
| 105 | /* converting one N-point transform into 32 - 10 point transforms */ |
| 106 | /* transforms, where N = 1 << DCT_LENGTH_LOG. */ |
| 107 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ |
| 108 | test(); |
| 109 | if (dct_length==DCT_LENGTH) |
| 110 | { |
| 111 | dct_length_log = DCT_LENGTH_LOG; |
| 112 | move16(); |
| 113 | dither_ptr = dither; |
| 114 | move16(); |
| 115 | } |
| 116 | else |
| 117 | { |
| 118 | dct_length_log = MAX_DCT_LENGTH_LOG; |
| 119 | move16(); |
| 120 | dither_ptr = max_dither; |
| 121 | move16(); |
| 122 | } |
| 123 | |
| 124 | in_buffer = input; |
| 125 | move16(); |
| 126 | out_buffer = buffer_a; |
| 127 | move16(); |
| 128 | |
| 129 | index=0; |
| 130 | move16(); |
| 131 | |
| 132 | i=0; |
| 133 | move16(); |
| 134 | |
| 135 | for (set_count_log = 0; set_count_log <= dct_length_log - 2; set_count_log++) |
| 136 | { |
| 137 | |
| 138 | /*===========================================================*/ |
| 139 | /* Initialization for the loop over sets at the current size */ |
| 140 | /*===========================================================*/ |
| 141 | |
| 142 | /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ |
| 143 | set_span = shr_nocheck(dct_length,set_count_log); |
| 144 | |
| 145 | set_count = shl_nocheck(1,set_count_log); |
| 146 | in_ptr = in_buffer; |
| 147 | move16(); |
| 148 | next_out_base = out_buffer; |
| 149 | move16(); |
| 150 | |
| 151 | /*=====================================*/ |
| 152 | /* Loop over all the sets of this size */ |
| 153 | /*=====================================*/ |
| 154 | temp = sub(index,1); |
| 155 | test(); |
| 156 | if(temp < 0) |
| 157 | { |
| 158 | for (sets_left = set_count;sets_left > 0;sets_left--) |
| 159 | { |
| 160 | |
| 161 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 162 | /* Set up output pointers for the current set */ |
| 163 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 164 | /* pointer arithmetic */ |
| 165 | out_ptr_low = next_out_base; |
| 166 | move16(); |
| 167 | next_out_base += set_span; |
| 168 | move16(); |
| 169 | out_ptr_high = next_out_base; |
| 170 | move16(); |
| 171 | |
| 172 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 173 | /* Loop over all the butterflies in the current set */ |
| 174 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 175 | |
| 176 | do |
| 177 | { |
| 178 | in_val_low = *in_ptr++; |
| 179 | move16(); |
| 180 | in_val_high = *in_ptr++; |
| 181 | move16(); |
| 182 | |
| 183 | /* BEST METHOD OF GETTING RID OF BIAS, BUT COMPUTATIONALLY UNPLEASANT */ |
| 184 | /* ALTERNATIVE METHOD, SMEARS BIAS OVER THE ENTIRE FRAME, COMPUTATIONALLY SIMPLEST. */ |
| 185 | /* IF THIS WORKS, IT'S PREFERABLE */ |
| 186 | |
| 187 | dummy = add(in_val_low,dither_ptr[i++]); |
| 188 | // blp: addition of two 16bits vars, there's no way |
| 189 | // they'll overflow a 32bit var |
| 190 | //acca = L_add(dummy,in_val_high); |
| 191 | acca = dummy + in_val_high; |
| 192 | out_val_low = extract_l(L_shr_nocheck(acca,1)); |
| 193 | |
| 194 | dummy = add(in_val_low,dither_ptr[i++]); |
| 195 | // blp: addition of two 16bits vars, there's no way |
| 196 | // they'll overflow a 32bit var |
| 197 | //acca = L_add(dummy,-in_val_high); |
| 198 | acca = dummy - in_val_high; |
| 199 | out_val_high = extract_l(L_shr_nocheck(acca,1)); |
| 200 | |
| 201 | *out_ptr_low++ = out_val_low; |
| 202 | move16(); |
| 203 | *--out_ptr_high = out_val_high; |
| 204 | move16(); |
| 205 | |
| 206 | test(); |
| 207 | |
| 208 | /* this involves comparison of pointers */ |
| 209 | /* pointer arithmetic */ |
| 210 | |
| 211 | } while (out_ptr_low < out_ptr_high); |
| 212 | |
| 213 | } /* End of loop over sets of the current size */ |
| 214 | } |
| 215 | else |
| 216 | { |
| 217 | for (sets_left = set_count; sets_left > 0; sets_left--) |
| 218 | { |
| 219 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 220 | /* Set up output pointers for the current set */ |
| 221 | /*||||||||||||||||||||||||||||||||||||||||||||*/ |
| 222 | |
| 223 | out_ptr_low = next_out_base; |
| 224 | move16(); |
| 225 | next_out_base += set_span; |
| 226 | move16(); |
| 227 | out_ptr_high = next_out_base; |
| 228 | move16(); |
| 229 | |
| 230 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 231 | /* Loop over all the butterflies in the current set */ |
| 232 | /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 233 | |
| 234 | do |
| 235 | { |
| 236 | in_val_low = *in_ptr++; |
| 237 | move16(); |
| 238 | in_val_high = *in_ptr++; |
| 239 | move16(); |
| 240 | |
| 241 | out_val_low = add(in_val_low,in_val_high); |
| 242 | out_val_high = add(in_val_low,negate(in_val_high)); |
| 243 | |
| 244 | *out_ptr_low++ = out_val_low; |
| 245 | move16(); |
| 246 | *--out_ptr_high = out_val_high; |
| 247 | move16(); |
| 248 | |
| 249 | test(); |
| 250 | } while (out_ptr_low < out_ptr_high); |
| 251 | |
| 252 | } /* End of loop over sets of the current size */ |
| 253 | } |
| 254 | |
| 255 | /*============================================================*/ |
| 256 | /* Decide which buffers to use as input and output next time. */ |
| 257 | /* Except for the first time (when the input buffer is the */ |
| 258 | /* subroutine input) we just alternate the local buffers. */ |
| 259 | /*============================================================*/ |
| 260 | |
| 261 | in_buffer = out_buffer; |
| 262 | move16(); |
| 263 | |
| 264 | test(); |
| 265 | if (out_buffer == buffer_a) |
| 266 | { |
| 267 | out_buffer = buffer_b; |
| 268 | move16(); |
| 269 | } |
| 270 | else |
| 271 | { |
| 272 | out_buffer = buffer_a; |
| 273 | move16(); |
| 274 | } |
| 275 | |
| 276 | index = add(index,1); |
| 277 | } /* End of loop over set sizes */ |
| 278 | |
| 279 | |
| 280 | /*++++++++++++++++++++++++++++++++*/ |
| 281 | /* Do 32 - 10 point transforms */ |
| 282 | /*++++++++++++++++++++++++++++++++*/ |
| 283 | |
| 284 | pair_ptr = in_buffer; |
| 285 | move16(); |
| 286 | buffer_swap = buffer_c; |
| 287 | move16(); |
| 288 | |
| 289 | for (pairs_left = 1 << (dct_length_log - 1); pairs_left > 0; pairs_left--) |
| 290 | { |
| 291 | for ( k=0; k<CORE_SIZE; k++ ) |
| 292 | { |
| 293 | #if PJ_HAS_INT64 |
| 294 | /* blp: danger danger! not really compatible but faster */ |
| 295 | pj_int64_t sum64=0; |
| 296 | move32(); |
| 297 | |
| 298 | for ( i=0; i<CORE_SIZE; i++ ) |
| 299 | { |
| 300 | sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]); |
| 301 | } |
| 302 | sum = L_saturate(sum64); |
| 303 | #else |
| 304 | sum=0L; |
| 305 | move32(); |
| 306 | |
| 307 | for ( i=0; i<CORE_SIZE; i++ ) |
| 308 | { |
| 309 | sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]); |
| 310 | } |
| 311 | #endif |
| 312 | buffer_swap[k] = itu_round(sum); |
| 313 | } |
| 314 | |
| 315 | pair_ptr += CORE_SIZE; |
| 316 | move16(); |
| 317 | buffer_swap += CORE_SIZE; |
| 318 | move16(); |
| 319 | } |
| 320 | |
| 321 | for (i=0;i<dct_length;i++) |
| 322 | { |
| 323 | in_buffer[i] = buffer_c[i]; |
| 324 | move16(); |
| 325 | } |
| 326 | |
| 327 | table_ptr_ptr = s_cos_msin_table; |
| 328 | move16(); |
| 329 | |
| 330 | /*++++++++++++++++++++++++++++++*/ |
| 331 | /* Perform rotation butterflies */ |
| 332 | /*++++++++++++++++++++++++++++++*/ |
| 333 | index=0; |
| 334 | move16(); |
| 335 | |
| 336 | for (set_count_log = dct_length_log - 2 ; set_count_log >= 0; set_count_log--) |
| 337 | { |
| 338 | |
| 339 | /*===========================================================*/ |
| 340 | /* Initialization for the loop over sets at the current size */ |
| 341 | /*===========================================================*/ |
| 342 | |
| 343 | /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ |
| 344 | set_span = shr_nocheck(dct_length,set_count_log); |
| 345 | |
| 346 | set_count = shl_nocheck(1,set_count_log); |
| 347 | next_in_base = in_buffer; |
| 348 | move16(); |
| 349 | test(); |
| 350 | if (set_count_log == 0) |
| 351 | { |
| 352 | next_out_base = output; |
| 353 | move16(); |
| 354 | } |
| 355 | else |
| 356 | { |
| 357 | next_out_base = out_buffer; |
| 358 | move16(); |
| 359 | } |
| 360 | |
| 361 | /*=====================================*/ |
| 362 | /* Loop over all the sets of this size */ |
| 363 | /*=====================================*/ |
| 364 | |
| 365 | for (sets_left = set_count; sets_left > 0; sets_left--) |
| 366 | { |
| 367 | |
| 368 | /*|||||||||||||||||||||||||||||||||||||||||*/ |
| 369 | /* Set up the pointers for the current set */ |
| 370 | /*|||||||||||||||||||||||||||||||||||||||||*/ |
| 371 | |
| 372 | in_ptr_low = next_in_base; |
| 373 | move16(); |
| 374 | |
| 375 | temp = shr_nocheck(set_span,1); |
| 376 | in_ptr_high = in_ptr_low + temp; |
| 377 | move16(); |
| 378 | |
| 379 | next_in_base += set_span; |
| 380 | move16(); |
| 381 | |
| 382 | out_ptr_low = next_out_base; |
| 383 | move16(); |
| 384 | |
| 385 | next_out_base += set_span; |
| 386 | move16(); |
| 387 | out_ptr_high = next_out_base; |
| 388 | move16(); |
| 389 | |
| 390 | cos_msin_ptr = *table_ptr_ptr; |
| 391 | move16(); |
| 392 | |
| 393 | /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 394 | /* Loop over all the butterfly pairs in the current set */ |
| 395 | /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ |
| 396 | |
| 397 | do |
| 398 | { |
| 399 | in_low_even = *in_ptr_low++; |
| 400 | move16(); |
| 401 | in_low_odd = *in_ptr_low++; |
| 402 | move16(); |
| 403 | in_high_even = *in_ptr_high++; |
| 404 | move16(); |
| 405 | in_high_odd = *in_ptr_high++; |
| 406 | move16(); |
| 407 | cos_even = cos_msin_ptr[0].cosine; |
| 408 | move16(); |
| 409 | msin_even = cos_msin_ptr[0].minus_sine; |
| 410 | move16(); |
| 411 | cos_odd = cos_msin_ptr[1].cosine; |
| 412 | move16(); |
| 413 | msin_odd = cos_msin_ptr[1].minus_sine; |
| 414 | move16(); |
| 415 | cos_msin_ptr += 2; |
| 416 | |
| 417 | sum = 0L; |
| 418 | move32(); |
| 419 | |
| 420 | sum = L_mac(sum,cos_even,in_low_even); |
| 421 | sum = L_mac(sum,negate(msin_even),in_high_even); |
| 422 | out_low_even = itu_round(L_shl_nocheck(sum,1)); |
| 423 | |
| 424 | sum = 0L; |
| 425 | move32(); |
| 426 | sum = L_mac(sum,msin_even,in_low_even); |
| 427 | sum = L_mac(sum,cos_even,in_high_even); |
| 428 | out_high_even = itu_round(L_shl_nocheck(sum,1)); |
| 429 | |
| 430 | sum = 0L; |
| 431 | move32(); |
| 432 | sum = L_mac(sum,cos_odd,in_low_odd); |
| 433 | sum = L_mac(sum,msin_odd,in_high_odd); |
| 434 | out_low_odd = itu_round(L_shl_nocheck(sum,1)); |
| 435 | |
| 436 | sum = 0L; |
| 437 | move32(); |
| 438 | sum = L_mac(sum,msin_odd,in_low_odd); |
| 439 | sum = L_mac(sum,negate(cos_odd),in_high_odd); |
| 440 | out_high_odd = itu_round(L_shl_nocheck(sum,1)); |
| 441 | |
| 442 | *out_ptr_low++ = out_low_even; |
| 443 | move16(); |
| 444 | *--out_ptr_high = out_high_even; |
| 445 | move16(); |
| 446 | *out_ptr_low++ = out_low_odd; |
| 447 | move16(); |
| 448 | *--out_ptr_high = out_high_odd; |
| 449 | move16(); |
| 450 | |
| 451 | test(); |
| 452 | } while (out_ptr_low < out_ptr_high); |
| 453 | |
| 454 | } /* End of loop over sets of the current size */ |
| 455 | |
| 456 | /*=============================================*/ |
| 457 | /* Swap input and output buffers for next time */ |
| 458 | /*=============================================*/ |
| 459 | |
| 460 | buffer_swap = in_buffer; |
| 461 | move16(); |
| 462 | in_buffer = out_buffer; |
| 463 | move16(); |
| 464 | out_buffer = buffer_swap; |
| 465 | move16(); |
| 466 | |
| 467 | index = add(index,1); |
| 468 | table_ptr_ptr++; |
| 469 | } |
| 470 | /*------------------------------------ |
| 471 | |
| 472 | ADD IN BIAS FOR OUTPUT |
| 473 | |
| 474 | -----------------------------------*/ |
| 475 | if (dct_length==DCT_LENGTH) |
| 476 | { |
| 477 | for(i=0;i<320;i++) |
| 478 | { |
| 479 | // blp: addition of two 16bits vars, there's no way |
| 480 | // they'll overflow a 32bit var |
| 481 | //sum = L_add(output[i],syn_bias_7khz[i]); |
| 482 | sum = output[i] + syn_bias_7khz[i]; |
| 483 | acca = L_sub(sum,32767); |
| 484 | test(); |
| 485 | if (acca > 0) |
| 486 | { |
| 487 | sum = 32767L; |
| 488 | move32(); |
| 489 | } |
| 490 | // blp: addition of two 16bits vars, there's no way |
| 491 | // they'll overflow 32bit var |
| 492 | //acca = L_add(sum,32768L); |
| 493 | acca = sum + 32768; |
| 494 | test(); |
| 495 | if (acca < 0) |
| 496 | { |
| 497 | sum = -32768L; |
| 498 | move32(); |
| 499 | } |
| 500 | output[i] = extract_l(sum); |
| 501 | } |
| 502 | } |
| 503 | } |
| 504 | |