blob: 234dc2eee092353c356e7e01e0fd161012316965 [file] [log] [blame]
Benny Prijonoeb30bf52006-03-04 20:43:52 +00001/* Copyright (C) 2002 Jean-Marc Valin
2 File: cb_search.c
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*/
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include "cb_search.h"
37#include "filters.h"
38#include "stack_alloc.h"
39#include "vq.h"
40#include "misc.h"
41
42#ifdef _USE_SSE
43#include "cb_search_sse.h"
44#elif defined(ARM4_ASM) || defined(ARM5E_ASM)
45#include "cb_search_arm4.h"
46#elif defined(BFIN_ASM)
47#include "cb_search_bfin.h"
48#endif
49
50#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
51static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
52{
53 int i, j, k;
54 VARDECL(spx_word16_t *shape);
55 ALLOC(shape, subvect_size, spx_word16_t);
56 for (i=0;i<shape_cb_size;i++)
57 {
58 spx_word16_t *res;
59
60 res = resp+i*subvect_size;
61 for (k=0;k<subvect_size;k++)
62 shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
63 E[i]=0;
64
65 /* Compute codeword response using convolution with impulse response */
66 for(j=0;j<subvect_size;j++)
67 {
68 spx_word32_t resj=0;
69 spx_word16_t res16;
70 for (k=0;k<=j;k++)
71 resj = MAC16_16(resj,shape[k],r[j-k]);
72#ifdef FIXED_POINT
73 res16 = EXTRACT16(SHR32(resj, 11));
74#else
75 res16 = 0.03125f*resj;
76#endif
77 /* Compute codeword energy */
78 E[i]=MAC16_16(E[i],res16,res16);
79 res[j] = res16;
80 /*printf ("%d\n", (int)res[j]);*/
81 }
82 }
83
84}
85#endif
86
87#ifndef OVERRIDE_TARGET_UPDATE
88static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
89{
90 int n;
91 int q=0;
92 for (n=0;n<len;n++,q++)
93 t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q]));
94}
95#endif
96
97
98
99static void split_cb_search_shape_sign_N1(
100spx_sig_t target[], /* target vector */
101spx_coef_t ak[], /* LPCs for this subframe */
102spx_coef_t awk1[], /* Weighted LPCs for this subframe */
103spx_coef_t awk2[], /* Weighted LPCs for this subframe */
104const void *par, /* Codebook/search parameters*/
105int p, /* number of LPC coeffs */
106int nsf, /* number of samples in subframe */
107spx_sig_t *exc,
108spx_word16_t *r,
109SpeexBits *bits,
110char *stack,
111int complexity,
112int update_target
113)
114{
115 int i,j,m,q;
116#ifndef FIXED_POINT
117 int n;
118#endif
119 VARDECL(spx_word16_t *resp);
120#ifdef _USE_SSE
121 VARDECL(__m128 *resp2);
122 VARDECL(__m128 *E);
123#else
124 spx_word16_t *resp2;
125 VARDECL(spx_word32_t *E);
126#endif
127 VARDECL(spx_word16_t *t);
128 VARDECL(spx_sig_t *e);
129 const signed char *shape_cb;
130 int shape_cb_size, subvect_size, nb_subvect;
131 const split_cb_params *params;
132 int N=2;
133 int best_index;
134 spx_word32_t best_dist;
135 int have_sign;
136 N=complexity;
137 if (N>10)
138 N=10;
139 if (N<1)
140 N=1;
141
142 params = (const split_cb_params *) par;
143 subvect_size = params->subvect_size;
144 nb_subvect = params->nb_subvect;
145 shape_cb_size = 1<<params->shape_bits;
146 shape_cb = params->shape_cb;
147 have_sign = params->have_sign;
148 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
149#ifdef _USE_SSE
150 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
151 ALLOC(E, shape_cb_size>>2, __m128);
152#else
153 resp2 = resp;
154 ALLOC(E, shape_cb_size, spx_word32_t);
155#endif
156 ALLOC(t, nsf, spx_word16_t);
157 ALLOC(e, nsf, spx_sig_t);
158
159 /* FIXME: make that adaptive? */
160 for (i=0;i<nsf;i++)
161 t[i]=EXTRACT16(PSHR32(target[i],6));
162
163 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
164
165 for (i=0;i<nb_subvect;i++)
166 {
167 spx_word16_t *x=t+subvect_size*i;
168 /*Find new n-best based on previous n-best j*/
169 if (have_sign)
170 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
171 else
172 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
173
174 speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
175
176 {
177 int rind;
178 spx_word16_t *res;
179 spx_word16_t sign=1;
180 rind = best_index;
181 if (rind>=shape_cb_size)
182 {
183 sign=-1;
184 rind-=shape_cb_size;
185 }
186 res = resp+rind*subvect_size;
187 if (sign>0)
188 for (m=0;m<subvect_size;m++)
189 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
190 else
191 for (m=0;m<subvect_size;m++)
192 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
193
194#ifdef FIXED_POINT
195 if (sign)
196 {
197 for (j=0;j<subvect_size;j++)
198 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
199 } else {
200 for (j=0;j<subvect_size;j++)
201 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
202 }
203#else
204 for (j=0;j<subvect_size;j++)
205 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
206#endif
207
208 }
209
210 for (m=0;m<subvect_size;m++)
211 {
212 spx_word16_t g;
213 int rind;
214 spx_word16_t sign=1;
215 rind = best_index;
216 if (rind>=shape_cb_size)
217 {
218 sign=-1;
219 rind-=shape_cb_size;
220 }
221
222 q=subvect_size-m;
223#ifdef FIXED_POINT
224 g=sign*shape_cb[rind*subvect_size+m];
225 target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
226#else
227 g=sign*0.03125*shape_cb[rind*subvect_size+m];
228 /*FIXME: I think that one too can be replaced by target_update */
229 for (n=subvect_size*(i+1);n<nsf;n++,q++)
230 t[n] = SUB32(t[n],g*r[q]);
231#endif
232 }
233 }
234
235 /* Update excitation */
236 /* FIXME: We could update the excitation directly above */
237 for (j=0;j<nsf;j++)
238 exc[j]=ADD32(exc[j],e[j]);
239
240 /* Update target: only update target if necessary */
241 if (update_target)
242 {
243 VARDECL(spx_sig_t *r2);
244 ALLOC(r2, nsf, spx_sig_t);
245 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
246 for (j=0;j<nsf;j++)
247 target[j]=SUB32(target[j],r2[j]);
248 }
249}
250
251
252
253void split_cb_search_shape_sign(
254spx_sig_t target[], /* target vector */
255spx_coef_t ak[], /* LPCs for this subframe */
256spx_coef_t awk1[], /* Weighted LPCs for this subframe */
257spx_coef_t awk2[], /* Weighted LPCs for this subframe */
258const void *par, /* Codebook/search parameters*/
259int p, /* number of LPC coeffs */
260int nsf, /* number of samples in subframe */
261spx_sig_t *exc,
262spx_word16_t *r,
263SpeexBits *bits,
264char *stack,
265int complexity,
266int update_target
267)
268{
269 int i,j,k,m,n,q;
270 VARDECL(spx_word16_t *resp);
271#ifdef _USE_SSE
272 VARDECL(__m128 *resp2);
273 VARDECL(__m128 *E);
274#else
275 spx_word16_t *resp2;
276 VARDECL(spx_word32_t *E);
277#endif
278 VARDECL(spx_word16_t *t);
279 VARDECL(spx_sig_t *e);
280 VARDECL(spx_sig_t *r2);
281 VARDECL(spx_word16_t *tmp);
282 VARDECL(spx_word32_t *ndist);
283 VARDECL(spx_word32_t *odist);
284 VARDECL(int *itmp);
285 VARDECL(spx_word16_t **ot2);
286 VARDECL(spx_word16_t **nt2);
287 spx_word16_t **ot, **nt;
288 VARDECL(int **nind);
289 VARDECL(int **oind);
290 VARDECL(int *ind);
291 const signed char *shape_cb;
292 int shape_cb_size, subvect_size, nb_subvect;
293 const split_cb_params *params;
294 int N=2;
295 VARDECL(int *best_index);
296 VARDECL(spx_word32_t *best_dist);
297 VARDECL(int *best_nind);
298 VARDECL(int *best_ntarget);
299 int have_sign;
300 N=complexity;
301 if (N>10)
302 N=10;
303 if (N<1)
304 N=1;
305
306 if (N==1)
307 {
308 split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,complexity,update_target);
309 return;
310 }
311 ALLOC(ot2, N, spx_word16_t*);
312 ALLOC(nt2, N, spx_word16_t*);
313 ALLOC(oind, N, int*);
314 ALLOC(nind, N, int*);
315
316 params = (const split_cb_params *) par;
317 subvect_size = params->subvect_size;
318 nb_subvect = params->nb_subvect;
319 shape_cb_size = 1<<params->shape_bits;
320 shape_cb = params->shape_cb;
321 have_sign = params->have_sign;
322 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
323#ifdef _USE_SSE
324 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
325 ALLOC(E, shape_cb_size>>2, __m128);
326#else
327 resp2 = resp;
328 ALLOC(E, shape_cb_size, spx_word32_t);
329#endif
330 ALLOC(t, nsf, spx_word16_t);
331 ALLOC(e, nsf, spx_sig_t);
332 ALLOC(r2, nsf, spx_sig_t);
333 ALLOC(ind, nb_subvect, int);
334
335 ALLOC(tmp, 2*N*nsf, spx_word16_t);
336 for (i=0;i<N;i++)
337 {
338 ot2[i]=tmp+2*i*nsf;
339 nt2[i]=tmp+(2*i+1)*nsf;
340 }
341 ot=ot2;
342 nt=nt2;
343 ALLOC(best_index, N, int);
344 ALLOC(best_dist, N, spx_word32_t);
345 ALLOC(best_nind, N, int);
346 ALLOC(best_ntarget, N, int);
347 ALLOC(ndist, N, spx_word32_t);
348 ALLOC(odist, N, spx_word32_t);
349
350 ALLOC(itmp, 2*N*nb_subvect, int);
351 for (i=0;i<N;i++)
352 {
353 nind[i]=itmp+2*i*nb_subvect;
354 oind[i]=itmp+(2*i+1)*nb_subvect;
355 }
356
357 /* FIXME: make that adaptive? */
358 for (i=0;i<nsf;i++)
359 t[i]=EXTRACT16(PSHR32(target[i],6));
360
361 for (j=0;j<N;j++)
362 speex_move(&ot[j][0], t, nsf*sizeof(spx_word16_t));
363
364 /* Pre-compute codewords response and energy */
365 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
366
367 for (j=0;j<N;j++)
368 odist[j]=0;
369
370 /*For all subvectors*/
371 for (i=0;i<nb_subvect;i++)
372 {
373 /*"erase" nbest list*/
374 for (j=0;j<N;j++)
375 ndist[j]=VERY_LARGE32;
376
377 /*For all n-bests of previous subvector*/
378 for (j=0;j<N;j++)
379 {
380 spx_word16_t *x=ot[j]+subvect_size*i;
381 spx_word32_t tener = 0;
382 for (m=0;m<subvect_size;m++)
383 tener = MAC16_16(tener, x[m],x[m]);
384#ifdef FIXED_POINT
385 tener = SHR32(tener,1);
386#else
387 tener *= .5;
388#endif
389 /*Find new n-best based on previous n-best j*/
390 if (have_sign)
391 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
392 else
393 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
394
395 /*For all new n-bests*/
396 for (k=0;k<N;k++)
397 {
398 /* Compute total distance (including previous sub-vectors */
399 spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
400
401 /*update n-best list*/
402 if (err<ndist[N-1])
403 {
404 for (m=0;m<N;m++)
405 {
406 if (err < ndist[m])
407 {
408 for (n=N-1;n>m;n--)
409 {
410 ndist[n] = ndist[n-1];
411 best_nind[n] = best_nind[n-1];
412 best_ntarget[n] = best_ntarget[n-1];
413 }
414 ndist[m] = err;
415 best_nind[n] = best_index[k];
416 best_ntarget[n] = j;
417 break;
418 }
419 }
420 }
421 }
422 if (i==0)
423 break;
424 }
425 for (j=0;j<N;j++)
426 {
427 /*previous target (we don't care what happened before*/
428 for (m=(i+1)*subvect_size;m<nsf;m++)
429 nt[j][m]=ot[best_ntarget[j]][m];
430
431 /* New code: update the rest of the target only if it's worth it */
432 for (m=0;m<subvect_size;m++)
433 {
434 spx_word16_t g;
435 int rind;
436 spx_word16_t sign=1;
437 rind = best_nind[j];
438 if (rind>=shape_cb_size)
439 {
440 sign=-1;
441 rind-=shape_cb_size;
442 }
443
444 q=subvect_size-m;
445#ifdef FIXED_POINT
446 g=sign*shape_cb[rind*subvect_size+m];
447 target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
448#else
449 g=sign*0.03125*shape_cb[rind*subvect_size+m];
450 /*FIXME: I think that one too can be replaced by target_update */
451 for (n=subvect_size*(i+1);n<nsf;n++,q++)
452 nt[j][n] = SUB32(nt[j][n],g*r[q]);
453#endif
454 }
455
456 for (q=0;q<nb_subvect;q++)
457 nind[j][q]=oind[best_ntarget[j]][q];
458 nind[j][i]=best_nind[j];
459 }
460
461 /*update old-new data*/
462 /* just swap pointers instead of a long copy */
463 {
464 spx_word16_t **tmp2;
465 tmp2=ot;
466 ot=nt;
467 nt=tmp2;
468 }
469 for (j=0;j<N;j++)
470 for (m=0;m<nb_subvect;m++)
471 oind[j][m]=nind[j][m];
472 for (j=0;j<N;j++)
473 odist[j]=ndist[j];
474 }
475
476 /*save indices*/
477 for (i=0;i<nb_subvect;i++)
478 {
479 ind[i]=nind[0][i];
480 speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
481 }
482
483 /* Put everything back together */
484 for (i=0;i<nb_subvect;i++)
485 {
486 int rind;
487 spx_word16_t sign=1;
488 rind = ind[i];
489 if (rind>=shape_cb_size)
490 {
491 sign=-1;
492 rind-=shape_cb_size;
493 }
494#ifdef FIXED_POINT
495 if (sign==1)
496 {
497 for (j=0;j<subvect_size;j++)
498 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
499 } else {
500 for (j=0;j<subvect_size;j++)
501 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
502 }
503#else
504 for (j=0;j<subvect_size;j++)
505 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
506#endif
507 }
508 /* Update excitation */
509 for (j=0;j<nsf;j++)
510 exc[j]=ADD32(exc[j],e[j]);
511
512 /* Update target: only update target if necessary */
513 if (update_target)
514 {
515 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
516 for (j=0;j<nsf;j++)
517 target[j]=SUB32(target[j],r2[j]);
518 }
519}
520
521
522void split_cb_shape_sign_unquant(
523spx_sig_t *exc,
524const void *par, /* non-overlapping codebook */
525int nsf, /* number of samples in subframe */
526SpeexBits *bits,
527char *stack
528)
529{
530 int i,j;
531 VARDECL(int *ind);
532 VARDECL(int *signs);
533 const signed char *shape_cb;
534 int shape_cb_size, subvect_size, nb_subvect;
535 const split_cb_params *params;
536 int have_sign;
537
538 params = (const split_cb_params *) par;
539 subvect_size = params->subvect_size;
540 nb_subvect = params->nb_subvect;
541 shape_cb_size = 1<<params->shape_bits;
542 shape_cb = params->shape_cb;
543 have_sign = params->have_sign;
544
545 ALLOC(ind, nb_subvect, int);
546 ALLOC(signs, nb_subvect, int);
547
548 /* Decode codewords and gains */
549 for (i=0;i<nb_subvect;i++)
550 {
551 if (have_sign)
552 signs[i] = speex_bits_unpack_unsigned(bits, 1);
553 else
554 signs[i] = 0;
555 ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
556 }
557 /* Compute decoded excitation */
558 for (i=0;i<nb_subvect;i++)
559 {
560 spx_word16_t s=1;
561 if (signs[i])
562 s=-1;
563#ifdef FIXED_POINT
564 if (s==1)
565 {
566 for (j=0;j<subvect_size;j++)
567 exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
568 } else {
569 for (j=0;j<subvect_size;j++)
570 exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
571 }
572#else
573 for (j=0;j<subvect_size;j++)
574 exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
575#endif
576 }
577}
578
579void noise_codebook_quant(
580spx_sig_t target[], /* target vector */
581spx_coef_t ak[], /* LPCs for this subframe */
582spx_coef_t awk1[], /* Weighted LPCs for this subframe */
583spx_coef_t awk2[], /* Weighted LPCs for this subframe */
584const void *par, /* Codebook/search parameters*/
585int p, /* number of LPC coeffs */
586int nsf, /* number of samples in subframe */
587spx_sig_t *exc,
588spx_word16_t *r,
589SpeexBits *bits,
590char *stack,
591int complexity,
592int update_target
593)
594{
595 int i;
596 VARDECL(spx_sig_t *tmp);
597 ALLOC(tmp, nsf, spx_sig_t);
598 residue_percep_zero(target, ak, awk1, awk2, tmp, nsf, p, stack);
599
600 for (i=0;i<nsf;i++)
601 exc[i]+=tmp[i];
602 for (i=0;i<nsf;i++)
603 target[i]=0;
604
605}
606
607
608void noise_codebook_unquant(
609spx_sig_t *exc,
610const void *par, /* non-overlapping codebook */
611int nsf, /* number of samples in subframe */
612SpeexBits *bits,
613char *stack
614)
615{
616 speex_rand_vec(1, exc, nsf);
617}