Alexandre Savard | 0a32ed7 | 2012-08-07 19:26:46 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2002 Jean-Marc Valin |
| 2 | File: vbr.c |
| 3 | |
| 4 | VBR-related routines |
| 5 | |
| 6 | Redistribution and use in source and binary forms, with or without |
| 7 | modification, are permitted provided that the following conditions |
| 8 | are met: |
| 9 | |
| 10 | - Redistributions of source code must retain the above copyright |
| 11 | notice, this list of conditions and the following disclaimer. |
| 12 | |
| 13 | - Redistributions in binary form must reproduce the above copyright |
| 14 | notice, this list of conditions and the following disclaimer in the |
| 15 | documentation and/or other materials provided with the distribution. |
| 16 | |
| 17 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 18 | contributors may be used to endorse or promote products derived from |
| 19 | this software without specific prior written permission. |
| 20 | |
| 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 32 | |
| 33 | */ |
| 34 | |
| 35 | #ifdef HAVE_CONFIG_H |
| 36 | #include "config.h" |
| 37 | #endif |
| 38 | |
| 39 | #include "vbr.h" |
| 40 | #include <math.h> |
| 41 | |
| 42 | |
| 43 | #define sqr(x) ((x)*(x)) |
| 44 | |
| 45 | #define MIN_ENERGY 6000 |
| 46 | #define NOISE_POW .3 |
| 47 | |
| 48 | #ifndef DISABLE_VBR |
| 49 | |
| 50 | const float vbr_nb_thresh[9][11]={ |
| 51 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ |
| 52 | { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ |
| 53 | {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ |
| 54 | {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ |
| 55 | {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */ |
| 56 | {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */ |
| 57 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */ |
| 58 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */ |
| 59 | { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ |
| 60 | }; |
| 61 | |
| 62 | |
| 63 | const float vbr_hb_thresh[5][11]={ |
| 64 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ |
| 65 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ |
| 66 | {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ |
| 67 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ |
| 68 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ |
| 69 | }; |
| 70 | |
| 71 | const float vbr_uhb_thresh[2][11]={ |
| 72 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ |
| 73 | { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ |
| 74 | }; |
| 75 | |
| 76 | void vbr_init(VBRState *vbr) |
| 77 | { |
| 78 | int i; |
| 79 | |
| 80 | vbr->average_energy=0; |
| 81 | vbr->last_energy=1; |
| 82 | vbr->accum_sum=0; |
| 83 | vbr->energy_alpha=.1; |
| 84 | vbr->soft_pitch=0; |
| 85 | vbr->last_pitch_coef=0; |
| 86 | vbr->last_quality=0; |
| 87 | |
| 88 | vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); |
| 89 | vbr->noise_accum_count=.05; |
| 90 | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
| 91 | vbr->consec_noise=0; |
| 92 | |
| 93 | |
| 94 | for (i=0;i<VBR_MEMORY_SIZE;i++) |
| 95 | vbr->last_log_energy[i] = log(MIN_ENERGY); |
| 96 | } |
| 97 | |
| 98 | |
| 99 | /* |
| 100 | This function should analyse the signal and decide how critical the |
| 101 | coding error will be perceptually. The following factors should be |
| 102 | taken into account: |
| 103 | |
| 104 | -Attacks (positive energy derivative) should be coded with more bits |
| 105 | |
| 106 | -Stationary voiced segments should receive more bits |
| 107 | |
| 108 | -Segments with (very) low absolute energy should receive less bits (maybe |
| 109 | only shaped noise?) |
| 110 | |
| 111 | -DTX for near-zero energy? |
| 112 | |
| 113 | -Stationary fricative segments should have less bits |
| 114 | |
| 115 | -Temporal masking: when energy slope is decreasing, decrease the bit-rate |
| 116 | |
| 117 | -Decrease bit-rate for males (low pitch)? |
| 118 | |
| 119 | -(wideband only) less bits in the high-band when signal is very |
| 120 | non-stationary (harder to notice high-frequency noise)??? |
| 121 | |
| 122 | */ |
| 123 | |
| 124 | float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) |
| 125 | { |
| 126 | int i; |
| 127 | float ener=0, ener1=0, ener2=0; |
| 128 | float qual=7; |
| 129 | int va; |
| 130 | float log_energy; |
| 131 | float non_st=0; |
| 132 | float voicing; |
| 133 | float pow_ener; |
| 134 | |
| 135 | for (i=0;i<len>>1;i++) |
| 136 | ener1 += ((float)sig[i])*sig[i]; |
| 137 | |
| 138 | for (i=len>>1;i<len;i++) |
| 139 | ener2 += ((float)sig[i])*sig[i]; |
| 140 | ener=ener1+ener2; |
| 141 | |
| 142 | log_energy = log(ener+MIN_ENERGY); |
| 143 | for (i=0;i<VBR_MEMORY_SIZE;i++) |
| 144 | non_st += sqr(log_energy-vbr->last_log_energy[i]); |
| 145 | non_st = non_st/(30*VBR_MEMORY_SIZE); |
| 146 | if (non_st>1) |
| 147 | non_st=1; |
| 148 | |
| 149 | voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); |
| 150 | vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener; |
| 151 | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
| 152 | pow_ener = pow(ener,NOISE_POW); |
| 153 | if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) |
| 154 | vbr->noise_accum = .05*pow_ener; |
| 155 | |
| 156 | if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) |
| 157 | || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) |
| 158 | || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) |
| 159 | || (voicing<0 && non_st < .05)) |
| 160 | { |
| 161 | float tmp; |
| 162 | va = 0; |
| 163 | vbr->consec_noise++; |
| 164 | if (pow_ener > 3*vbr->noise_level) |
| 165 | tmp = 3*vbr->noise_level; |
| 166 | else |
| 167 | tmp = pow_ener; |
| 168 | if (vbr->consec_noise>=4) |
| 169 | { |
| 170 | vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; |
| 171 | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
| 172 | } |
| 173 | } else { |
| 174 | va = 1; |
| 175 | vbr->consec_noise=0; |
| 176 | } |
| 177 | |
| 178 | if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) |
| 179 | { |
| 180 | vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; |
| 181 | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
| 182 | } |
| 183 | |
| 184 | /* Checking for very low absolute energy */ |
| 185 | if (ener < 30000) |
| 186 | { |
| 187 | qual -= .7; |
| 188 | if (ener < 10000) |
| 189 | qual-=.7; |
| 190 | if (ener < 3000) |
| 191 | qual-=.7; |
| 192 | } else { |
| 193 | float short_diff, long_diff; |
| 194 | short_diff = log((ener+1)/(1+vbr->last_energy)); |
| 195 | long_diff = log((ener+1)/(1+vbr->average_energy)); |
| 196 | /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ |
| 197 | |
| 198 | if (long_diff<-5) |
| 199 | long_diff=-5; |
| 200 | if (long_diff>2) |
| 201 | long_diff=2; |
| 202 | |
| 203 | if (long_diff>0) |
| 204 | qual += .6*long_diff; |
| 205 | if (long_diff<0) |
| 206 | qual += .5*long_diff; |
| 207 | if (short_diff>0) |
| 208 | { |
| 209 | if (short_diff>5) |
| 210 | short_diff=5; |
| 211 | qual += .5*short_diff; |
| 212 | } |
| 213 | /* Checking for energy increases */ |
| 214 | if (ener2 > 1.6*ener1) |
| 215 | qual += .5; |
| 216 | } |
| 217 | vbr->last_energy = ener; |
| 218 | vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; |
| 219 | qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); |
| 220 | |
| 221 | if (qual < vbr->last_quality) |
| 222 | qual = .5*qual + .5*vbr->last_quality; |
| 223 | if (qual<4) |
| 224 | qual=4; |
| 225 | if (qual>10) |
| 226 | qual=10; |
| 227 | |
| 228 | /* |
| 229 | if (vbr->consec_noise>=2) |
| 230 | qual-=1.3; |
| 231 | if (vbr->consec_noise>=5) |
| 232 | qual-=1.3; |
| 233 | if (vbr->consec_noise>=12) |
| 234 | qual-=1.3; |
| 235 | */ |
| 236 | if (vbr->consec_noise>=3) |
| 237 | qual=4; |
| 238 | |
| 239 | if (vbr->consec_noise) |
| 240 | qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); |
| 241 | if (qual<0) |
| 242 | qual=0; |
| 243 | |
| 244 | if (ener<60000) |
| 245 | { |
| 246 | if (vbr->consec_noise>2) |
| 247 | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
| 248 | if (ener<10000&&vbr->consec_noise>2) |
| 249 | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
| 250 | if (qual<0) |
| 251 | qual=0; |
| 252 | qual += .3*log(.0001+ener/60000.0); |
| 253 | } |
| 254 | if (qual<-1) |
| 255 | qual=-1; |
| 256 | |
| 257 | /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/ |
| 258 | |
| 259 | vbr->last_pitch_coef = pitch_coef; |
| 260 | vbr->last_quality = qual; |
| 261 | |
| 262 | for (i=VBR_MEMORY_SIZE-1;i>0;i--) |
| 263 | vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; |
| 264 | vbr->last_log_energy[0] = log_energy; |
| 265 | |
| 266 | /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ |
| 267 | |
| 268 | return qual; |
| 269 | } |
| 270 | |
| 271 | void vbr_destroy(VBRState *vbr) |
| 272 | { |
| 273 | } |
| 274 | |
| 275 | #endif /* #ifndef DISABLE_VBR */ |