Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2002-2006 Jean-Marc Valin */ |
| 2 | /** |
| 3 | @file nb_celp.h |
| 4 | @brief Narrowband CELP encoder/decoder |
| 5 | */ |
| 6 | /* |
| 7 | Redistribution and use in source and binary forms, with or without |
| 8 | modification, are permitted provided that the following conditions |
| 9 | are met: |
| 10 | |
| 11 | - Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | |
| 14 | - Redistributions in binary form must reproduce the above copyright |
| 15 | notice, this list of conditions and the following disclaimer in the |
| 16 | documentation and/or other materials provided with the distribution. |
| 17 | |
| 18 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 19 | contributors may be used to endorse or promote products derived from |
| 20 | this software without specific prior written permission. |
| 21 | |
| 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 33 | |
| 34 | */ |
| 35 | |
| 36 | #ifndef NB_CELP_H |
| 37 | #define NB_CELP_H |
| 38 | |
| 39 | #include "modes.h" |
| 40 | #include <speex/speex_bits.h> |
| 41 | #include <speex/speex_callbacks.h> |
| 42 | #include "vbr.h" |
| 43 | #include "filters.h" |
| 44 | |
| 45 | #ifdef VORBIS_PSYCHO |
| 46 | #include "vorbis_psy.h" |
| 47 | #endif |
| 48 | |
| 49 | /**Structure representing the full state of the narrowband encoder*/ |
| 50 | typedef struct EncState { |
| 51 | const SpeexMode *mode; /**< Mode corresponding to the state */ |
| 52 | int first; /**< Is this the first frame? */ |
| 53 | int frameSize; /**< Size of frames */ |
| 54 | int subframeSize; /**< Size of sub-frames */ |
| 55 | int nbSubframes; /**< Number of sub-frames */ |
| 56 | int windowSize; /**< Analysis (LPC) window length */ |
| 57 | int lpcSize; /**< LPC order */ |
| 58 | int min_pitch; /**< Minimum pitch value allowed */ |
| 59 | int max_pitch; /**< Maximum pitch value allowed */ |
| 60 | |
| 61 | spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */ |
| 62 | int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */ |
| 63 | int ol_pitch; /**< Open-loop pitch */ |
| 64 | int ol_voiced; /**< Open-loop voiced/non-voiced decision */ |
| 65 | int *pitch; |
| 66 | |
| 67 | #ifdef VORBIS_PSYCHO |
| 68 | VorbisPsy *psy; |
| 69 | float *psy_window; |
| 70 | float *curve; |
| 71 | float *old_curve; |
| 72 | #endif |
| 73 | |
| 74 | spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */ |
| 75 | spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */ |
| 76 | spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/ |
| 77 | char *stack; /**< Pseudo-stack allocation for temporary memory */ |
| 78 | spx_word16_t *winBuf; /**< Input buffer (original signal) */ |
| 79 | spx_word16_t *excBuf; /**< Excitation buffer */ |
| 80 | spx_word16_t *exc; /**< Start of excitation frame */ |
| 81 | spx_word16_t *swBuf; /**< Weighted signal buffer */ |
| 82 | spx_word16_t *sw; /**< Start of weighted signal frame */ |
| 83 | const spx_word16_t *window; /**< Temporary (Hanning) window */ |
| 84 | const spx_word16_t *lagWindow; /**< Window applied to auto-correlation */ |
| 85 | spx_lsp_t *old_lsp; /**< LSPs for previous frame */ |
| 86 | spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ |
| 87 | spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */ |
| 88 | spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */ |
| 89 | spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/ |
| 90 | spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */ |
| 91 | spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */ |
| 92 | spx_mem_t mem_hp[2]; /**< High-pass filter memory */ |
| 93 | spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ |
| 94 | spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */ |
| 95 | |
| 96 | #ifndef DISABLE_VBR |
| 97 | VBRState *vbr; /**< State of the VBR data */ |
| 98 | float vbr_quality; /**< Quality setting for VBR encoding */ |
| 99 | float relative_quality; /**< Relative quality that will be needed by VBR */ |
| 100 | spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ |
| 101 | spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */ |
| 102 | int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ |
| 103 | int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */ |
| 104 | int dtx_count; /**< Number of consecutive DTX frames */ |
| 105 | spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */ |
| 106 | float abr_drift; |
| 107 | float abr_drift2; |
| 108 | float abr_count; |
| 109 | #endif /* #ifndef DISABLE_VBR */ |
| 110 | |
| 111 | int complexity; /**< Complexity setting (0-10 from least complex to most complex) */ |
| 112 | spx_int32_t sampling_rate; |
| 113 | int plc_tuning; |
| 114 | int encode_submode; |
| 115 | const SpeexSubmode * const *submodes; /**< Sub-mode data */ |
| 116 | int submodeID; /**< Activated sub-mode */ |
| 117 | int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */ |
| 118 | int isWideband; /**< Is this used as part of the embedded wideband codec */ |
| 119 | int highpass_enabled; /**< Is the input filter enabled */ |
| 120 | } EncState; |
| 121 | |
| 122 | /**Structure representing the full state of the narrowband decoder*/ |
| 123 | typedef struct DecState { |
| 124 | const SpeexMode *mode; /**< Mode corresponding to the state */ |
| 125 | int first; /**< Is this the first frame? */ |
| 126 | int count_lost; /**< Was the last frame lost? */ |
| 127 | int frameSize; /**< Size of frames */ |
| 128 | int subframeSize; /**< Size of sub-frames */ |
| 129 | int nbSubframes; /**< Number of sub-frames */ |
| 130 | int lpcSize; /**< LPC order */ |
| 131 | int min_pitch; /**< Minimum pitch value allowed */ |
| 132 | int max_pitch; /**< Maximum pitch value allowed */ |
| 133 | spx_int32_t sampling_rate; |
| 134 | |
| 135 | spx_word16_t last_ol_gain; /**< Open-loop gain for previous frame */ |
| 136 | |
| 137 | char *stack; /**< Pseudo-stack allocation for temporary memory */ |
| 138 | spx_word16_t *excBuf; /**< Excitation buffer */ |
| 139 | spx_word16_t *exc; /**< Start of excitation frame */ |
| 140 | spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ |
| 141 | spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */ |
| 142 | spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */ |
| 143 | spx_mem_t mem_hp[2]; /**< High-pass filter memory */ |
| 144 | spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ |
| 145 | spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */ |
| 146 | |
| 147 | spx_word16_t level; |
| 148 | spx_word16_t max_level; |
| 149 | spx_word16_t min_level; |
| 150 | |
| 151 | /* This is used in packet loss concealment */ |
| 152 | int last_pitch; /**< Pitch of last correctly decoded frame */ |
| 153 | spx_word16_t last_pitch_gain; /**< Pitch gain of last correctly decoded frame */ |
| 154 | spx_word16_t pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */ |
| 155 | int pitch_gain_buf_idx; /**< Tail of the buffer */ |
| 156 | spx_int32_t seed; /** Seed used for random number generation */ |
| 157 | |
| 158 | int encode_submode; |
| 159 | const SpeexSubmode * const *submodes; /**< Sub-mode data */ |
| 160 | int submodeID; /**< Activated sub-mode */ |
| 161 | int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */ |
| 162 | SpeexCallback speex_callbacks[SPEEX_MAX_CALLBACKS]; |
| 163 | |
| 164 | SpeexCallback user_callback; |
| 165 | |
| 166 | /*Vocoder data*/ |
| 167 | spx_word16_t voc_m1; |
| 168 | spx_word32_t voc_m2; |
| 169 | spx_word16_t voc_mean; |
| 170 | int voc_offset; |
| 171 | |
| 172 | int dtx_enabled; |
| 173 | int isWideband; /**< Is this used as part of the embedded wideband codec */ |
| 174 | int highpass_enabled; /**< Is the input filter enabled */ |
| 175 | } DecState; |
| 176 | |
| 177 | /** Initializes encoder state*/ |
| 178 | void *nb_encoder_init(const SpeexMode *m); |
| 179 | |
| 180 | /** De-allocates encoder state resources*/ |
| 181 | void nb_encoder_destroy(void *state); |
| 182 | |
| 183 | /** Encodes one frame*/ |
| 184 | int nb_encode(void *state, void *in, SpeexBits *bits); |
| 185 | |
| 186 | |
| 187 | /** Initializes decoder state*/ |
| 188 | void *nb_decoder_init(const SpeexMode *m); |
| 189 | |
| 190 | /** De-allocates decoder state resources*/ |
| 191 | void nb_decoder_destroy(void *state); |
| 192 | |
| 193 | /** Decodes one frame*/ |
| 194 | int nb_decode(void *state, SpeexBits *bits, void *out); |
| 195 | |
| 196 | /** ioctl-like function for controlling a narrowband encoder */ |
| 197 | int nb_encoder_ctl(void *state, int request, void *ptr); |
| 198 | |
| 199 | /** ioctl-like function for controlling a narrowband decoder */ |
| 200 | int nb_decoder_ctl(void *state, int request, void *ptr); |
| 201 | |
| 202 | |
| 203 | #endif |