Alexandre Savard | 0a32ed7 | 2012-08-07 19:26:46 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2004 Jean-Marc Valin */ |
| 2 | /** |
| 3 | @file cb_search_arm4.h |
| 4 | @brief Fixed codebook functions (ARM4 version) |
| 5 | */ |
| 6 | /* |
| 7 | Redistribution and use in source and binary forms, with or without |
| 8 | modification, are permitted provided that the following conditions |
| 9 | are met: |
| 10 | |
| 11 | - Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | |
| 14 | - Redistributions in binary form must reproduce the above copyright |
| 15 | notice, this list of conditions and the following disclaimer in the |
| 16 | documentation and/or other materials provided with the distribution. |
| 17 | |
| 18 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 19 | contributors may be used to endorse or promote products derived from |
| 20 | this software without specific prior written permission. |
| 21 | |
| 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 33 | */ |
| 34 | |
| 35 | /* This optimization is temporaly disabled until it is fixed to account for the fact |
| 36 | that "r" is now a 16-bit array */ |
| 37 | #if 0 |
| 38 | #define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK |
| 39 | static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack) |
| 40 | { |
| 41 | int i, j, k; |
| 42 | //const signed char *shape; |
| 43 | for (i=0;i<shape_cb_size;i+=4) |
| 44 | { |
| 45 | |
| 46 | //shape = shape_cb; |
| 47 | E[0]=0; |
| 48 | E[1]=0; |
| 49 | E[2]=0; |
| 50 | E[3]=0; |
| 51 | |
| 52 | /* Compute codeword response using convolution with impulse response */ |
| 53 | for(j=0;j<subvect_size;j++) |
| 54 | { |
| 55 | #if 1 |
| 56 | spx_word16_t *res; |
| 57 | res = resp+j; |
| 58 | spx_word32_t resj0,resj1,resj2,resj3; |
| 59 | spx_word32_t dead1, dead2, dead3, dead4, dead5, dead6, dead7, dead8; |
| 60 | __asm__ __volatile__ ( |
| 61 | "mov %0, #0 \n\t" |
| 62 | "mov %1, #0 \n\t" |
| 63 | "mov %2, #0 \n\t" |
| 64 | "mov %3, #0 \n\t" |
| 65 | ".weighted%=: \n\t" |
| 66 | "ldrsb %8, [%6] \n\t" |
| 67 | "ldr %10, [%5], #-4 \n\t" |
| 68 | "mov %9, %6 \n\t" |
| 69 | "ldrsb %11, [%9, %7]! \n\t" |
| 70 | "mla %0, %10, %8, %0 \n\t" |
| 71 | "ldrsb %8, [%9, %7]! \n\t" |
| 72 | "mla %1, %10, %11, %1 \n\t" |
| 73 | "ldrsb %11, [%9, %7]! \n\t" |
| 74 | "mla %2, %10, %8, %2 \n\t" |
| 75 | "subs %4, %4, #1 \n\t" |
| 76 | "mla %3, %10, %11, %3 \n\t" |
| 77 | "add %6, %6, #1 \n\t" |
| 78 | "bne .weighted%= \n\t" |
| 79 | : "=r" (resj0), "=r" (resj1), "=r" (resj2), "=r" (resj3), |
| 80 | "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), |
| 81 | "=r" (dead5), "=r" (dead6), "=r" (dead7), "=r" (dead8) |
| 82 | : "4" (j+1), "5" (r+j), "6" (shape_cb), "7" (subvect_size) |
| 83 | : "cc", "memory"); |
| 84 | #else |
| 85 | spx_word16_t *res; |
| 86 | res = resp+j; |
| 87 | spx_word32_t resj0=0; |
| 88 | spx_word32_t resj1=0; |
| 89 | spx_word32_t resj2=0; |
| 90 | spx_word32_t resj3=0; |
| 91 | for (k=0;k<=j;k++) |
| 92 | { |
| 93 | const signed char *shape=shape_cb+k; |
| 94 | resj0 = MAC16_16(resj0,*shape,r[j-k]); |
| 95 | shape += subvect_size; |
| 96 | resj1 = MAC16_16(resj1,*shape,r[j-k]); |
| 97 | shape += subvect_size; |
| 98 | resj2 = MAC16_16(resj2,*shape,r[j-k]); |
| 99 | shape += subvect_size; |
| 100 | resj3 = MAC16_16(resj3,*shape,r[j-k]); |
| 101 | shape += subvect_size; |
| 102 | } |
| 103 | #endif |
| 104 | |
| 105 | #ifdef FIXED_POINT |
| 106 | resj0 = SHR(resj0, 11); |
| 107 | resj1 = SHR(resj1, 11); |
| 108 | resj2 = SHR(resj2, 11); |
| 109 | resj3 = SHR(resj3, 11); |
| 110 | #else |
| 111 | resj0 *= 0.03125; |
| 112 | resj1 *= 0.03125; |
| 113 | resj2 *= 0.03125; |
| 114 | resj3 *= 0.03125; |
| 115 | #endif |
| 116 | |
| 117 | /* Compute codeword energy */ |
| 118 | E[0]=ADD32(E[0],MULT16_16(resj0,resj0)); |
| 119 | E[1]=ADD32(E[1],MULT16_16(resj1,resj1)); |
| 120 | E[2]=ADD32(E[2],MULT16_16(resj2,resj2)); |
| 121 | E[3]=ADD32(E[3],MULT16_16(resj3,resj3)); |
| 122 | *res = resj0; |
| 123 | res += subvect_size; |
| 124 | *res = resj1; |
| 125 | res += subvect_size; |
| 126 | *res = resj2; |
| 127 | res += subvect_size; |
| 128 | *res = resj3; |
| 129 | res += subvect_size; |
| 130 | } |
| 131 | resp += subvect_size<<2; |
| 132 | shape_cb += subvect_size<<2; |
| 133 | E+=4; |
| 134 | } |
| 135 | |
| 136 | } |
| 137 | #endif |