Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2004 Jean-Marc Valin */ |
| 2 | /** |
| 3 | @file ltp_arm4.h |
| 4 | @brief Long-Term Prediction functions (ARM4 version) |
| 5 | */ |
| 6 | /* |
| 7 | Redistribution and use in source and binary forms, with or without |
| 8 | modification, are permitted provided that the following conditions |
| 9 | are met: |
| 10 | |
| 11 | - Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | |
| 14 | - Redistributions in binary form must reproduce the above copyright |
| 15 | notice, this list of conditions and the following disclaimer in the |
| 16 | documentation and/or other materials provided with the distribution. |
| 17 | |
| 18 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 19 | contributors may be used to endorse or promote products derived from |
| 20 | this software without specific prior written permission. |
| 21 | |
| 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 33 | */ |
| 34 | |
| 35 | #define OVERRIDE_INNER_PROD |
| 36 | spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) |
| 37 | { |
| 38 | spx_word32_t sum1=0,sum2=0; |
| 39 | spx_word16_t *deadx, *deady; |
| 40 | int deadlen, dead1, dead2, dead3, dead4, dead5, dead6; |
| 41 | __asm__ __volatile__ ( |
| 42 | "\tldrsh %5, [%0], #2 \n" |
| 43 | "\tldrsh %6, [%1], #2 \n" |
| 44 | ".inner_prod_loop%=:\n" |
| 45 | "\tsub %7, %7, %7\n" |
| 46 | "\tsub %10, %10, %10\n" |
| 47 | |
| 48 | "\tldrsh %8, [%0], #2 \n" |
| 49 | "\tldrsh %9, [%1], #2 \n" |
| 50 | "\tmla %7, %5, %6, %7\n" |
| 51 | "\tldrsh %5, [%0], #2 \n" |
| 52 | "\tldrsh %6, [%1], #2 \n" |
| 53 | "\tmla %10, %8, %9, %10\n" |
| 54 | "\tldrsh %8, [%0], #2 \n" |
| 55 | "\tldrsh %9, [%1], #2 \n" |
| 56 | "\tmla %7, %5, %6, %7\n" |
| 57 | "\tldrsh %5, [%0], #2 \n" |
| 58 | "\tldrsh %6, [%1], #2 \n" |
| 59 | "\tmla %10, %8, %9, %10\n" |
| 60 | |
| 61 | "\tldrsh %8, [%0], #2 \n" |
| 62 | "\tldrsh %9, [%1], #2 \n" |
| 63 | "\tmla %7, %5, %6, %7\n" |
| 64 | "\tldrsh %5, [%0], #2 \n" |
| 65 | "\tldrsh %6, [%1], #2 \n" |
| 66 | "\tmla %10, %8, %9, %10\n" |
| 67 | "\tldrsh %8, [%0], #2 \n" |
| 68 | "\tldrsh %9, [%1], #2 \n" |
| 69 | "\tmla %7, %5, %6, %7\n" |
| 70 | "\tldrsh %5, [%0], #2 \n" |
| 71 | "\tldrsh %6, [%1], #2 \n" |
| 72 | "\tmla %10, %8, %9, %10\n" |
| 73 | |
| 74 | "\tsubs %4, %4, #1\n" |
| 75 | "\tadd %2, %2, %7, asr #5\n" |
| 76 | "\tadd %3, %3, %10, asr #5\n" |
| 77 | "\tbne .inner_prod_loop%=\n" |
| 78 | : "=r" (deadx), "=r" (deady), "+r" (sum1), "+r" (sum2), |
| 79 | "=r" (deadlen), "=r" (dead1), "=r" (dead2), "=r" (dead3), |
| 80 | "=r" (dead4), "=r" (dead5), "=r" (dead6) |
| 81 | : "0" (x), "1" (y), "4" (len>>3) |
| 82 | : "cc" |
| 83 | ); |
| 84 | return (sum1+sum2)>>1; |
| 85 | } |
| 86 | |
| 87 | #define OVERRIDE_PITCH_XCORR |
| 88 | void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) |
| 89 | { |
| 90 | int i,j; |
| 91 | for (i=0;i<nb_pitch;i+=4) |
| 92 | { |
| 93 | /* Compute correlation*/ |
| 94 | //corr[nb_pitch-1-i]=inner_prod(x, _y+i, len); |
| 95 | spx_word32_t sum1=0; |
| 96 | spx_word32_t sum2=0; |
| 97 | spx_word32_t sum3=0; |
| 98 | spx_word32_t sum4=0; |
| 99 | const spx_word16_t *y = _y+i; |
| 100 | const spx_word16_t *x = _x; |
| 101 | spx_word32_t y0, y1, y2, y3; |
| 102 | y0=*y++; |
| 103 | y1=*y++; |
| 104 | y2=*y++; |
| 105 | y3=*y++; |
| 106 | for (j=0;j<len;j+=4) |
| 107 | { |
| 108 | spx_word32_t part1, part2, part3, part4, x0; |
| 109 | spx_word32_t dead1; |
| 110 | __asm__ __volatile__ ( |
| 111 | #ifdef SHORTCUTS |
| 112 | "\tldrsh %10, [%8], #4 \n" |
| 113 | "\tmul %4, %10, %0 \n" |
| 114 | "\tldrsh %15, [%8], #4 \n" |
| 115 | "\tmul %5, %10, %1 \n" |
| 116 | "\tldrsh %0, [%9], #2 \n" |
| 117 | "\tmul %6, %10, %2 \n" |
| 118 | "\tldrsh %1, [%9], #2 \n" |
| 119 | "\tmul %7, %10, %3 \n" |
| 120 | |
| 121 | |
| 122 | "\tmla %4, %15, %2, %4 \n" |
| 123 | "\tldrsh %2, [%9], #2 \n" |
| 124 | "\tmla %5, %15, %3, %5 \n" |
| 125 | "\tldrsh %3, [%9], #2 \n" |
| 126 | "\tmla %6, %15, %0, %6 \n" |
| 127 | "\tmla %7, %15, %1, %7 \n" |
| 128 | |
| 129 | #else |
| 130 | "\tldrsh %10, [%8], #2 \n" |
| 131 | "\tmul %4, %10, %0 \n" |
| 132 | "\tmul %5, %10, %1 \n" |
| 133 | "\tmul %6, %10, %2 \n" |
| 134 | "\tmul %7, %10, %3 \n" |
| 135 | |
| 136 | "\tldrsh %10, [%8], #2 \n" |
| 137 | "\tldrsh %0, [%9], #2 \n" |
| 138 | "\tmla %4, %10, %1, %4 \n" |
| 139 | "\tmla %5, %10, %2, %5 \n" |
| 140 | "\tmla %6, %10, %3, %6 \n" |
| 141 | "\tmla %7, %10, %0, %7 \n" |
| 142 | |
| 143 | "\tldrsh %10, [%8], #2 \n" |
| 144 | "\tldrsh %1, [%9], #2 \n" |
| 145 | "\tmla %4, %10, %2, %4 \n" |
| 146 | "\tmla %5, %10, %3, %5 \n" |
| 147 | "\tmla %6, %10, %0, %6 \n" |
| 148 | "\tmla %7, %10, %1, %7 \n" |
| 149 | |
| 150 | "\tldrsh %10, [%8], #2 \n" |
| 151 | "\tldrsh %2, [%9], #2 \n" |
| 152 | "\tmla %4, %10, %3, %4 \n" |
| 153 | "\tmla %5, %10, %0, %5 \n" |
| 154 | "\tmla %6, %10, %1, %6 \n" |
| 155 | "\tmla %7, %10, %2, %7 \n" |
| 156 | |
| 157 | "\tldrsh %3, [%9], #2 \n" |
| 158 | #endif |
| 159 | |
| 160 | "\tldr %10, %11 \n" |
| 161 | "\tldr %15, %12 \n" |
| 162 | "\tadd %4, %10, %4, asr #6 \n" |
| 163 | "\tstr %4, %11 \n" |
| 164 | "\tldr %10, %13 \n" |
| 165 | "\tadd %5, %15, %5, asr #6 \n" |
| 166 | "\tstr %5, %12 \n" |
| 167 | "\tldr %15, %14 \n" |
| 168 | "\tadd %6, %10, %6, asr #6 \n" |
| 169 | "\tadd %7, %15, %7, asr #6 \n" |
| 170 | "\tstr %6, %13 \n" |
| 171 | "\tstr %7, %14 \n" |
| 172 | |
| 173 | : "+r" (y0), "+r" (y1), "+r" (y2), "+r" (y3), |
| 174 | "=r" (part1), "=r" (part2), "=r" (part3), "=r" (part4), |
| 175 | "+r" (x), "+r" (y), "=r" (x0), "+m" (sum1), |
| 176 | "+m" (sum2), "+m" (sum3), "+m" (sum4), "=r" (dead1) |
| 177 | : |
| 178 | : "cc", "memory" |
| 179 | ); |
| 180 | } |
| 181 | corr[nb_pitch-1-i]=sum1; |
| 182 | corr[nb_pitch-2-i]=sum2; |
| 183 | corr[nb_pitch-3-i]=sum3; |
| 184 | corr[nb_pitch-4-i]=sum4; |
| 185 | } |
| 186 | |
| 187 | } |