1 /* Copyright (C) 2002 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "cb_search.h"
38 #include "stack_alloc.h"
43 #include "cb_search_sse.h"
44 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
45 #include "cb_search_arm4.h"
46 #elif defined(BFIN_ASM)
47 #include "cb_search_bfin.h"
50 #ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
51 static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
54 VARDECL(spx_word16_t *shape);
55 ALLOC(shape, subvect_size, spx_word16_t);
56 for (i=0;i<shape_cb_size;i++)
60 res = resp+i*subvect_size;
61 for (k=0;k<subvect_size;k++)
62 shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
65 /* Compute codeword response using convolution with impulse response */
66 for(j=0;j<subvect_size;j++)
71 resj = MAC16_16(resj,shape[k],r[j-k]);
73 res16 = EXTRACT16(SHR32(resj, 11));
75 res16 = 0.03125f*resj;
77 /* Compute codeword energy */
78 E[i]=MAC16_16(E[i],res16,res16);
80 /*printf ("%d\n", (int)res[j]);*/
87 #ifndef OVERRIDE_TARGET_UPDATE
88 static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
92 for (n=0;n<len;n++,q++)
93 t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q]));
99 static void split_cb_search_shape_sign_N1(
100 spx_sig_t target[], /* target vector */
101 spx_coef_t ak[], /* LPCs for this subframe */
102 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
103 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
104 const void *par, /* Codebook/search parameters*/
105 int p, /* number of LPC coeffs */
106 int nsf, /* number of samples in subframe */
119 VARDECL(spx_word16_t *resp);
121 VARDECL(__m128 *resp2);
125 VARDECL(spx_word32_t *E);
127 VARDECL(spx_word16_t *t);
128 VARDECL(spx_sig_t *e);
129 const signed char *shape_cb;
130 int shape_cb_size, subvect_size, nb_subvect;
131 const split_cb_params *params;
134 spx_word32_t best_dist;
142 params = (const split_cb_params *) par;
143 subvect_size = params->subvect_size;
144 nb_subvect = params->nb_subvect;
145 shape_cb_size = 1<<params->shape_bits;
146 shape_cb = params->shape_cb;
147 have_sign = params->have_sign;
148 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
150 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
151 ALLOC(E, shape_cb_size>>2, __m128);
154 ALLOC(E, shape_cb_size, spx_word32_t);
156 ALLOC(t, nsf, spx_word16_t);
157 ALLOC(e, nsf, spx_sig_t);
159 /* FIXME: make that adaptive? */
161 t[i]=EXTRACT16(PSHR32(target[i],6));
163 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
165 for (i=0;i<nb_subvect;i++)
167 spx_word16_t *x=t+subvect_size*i;
168 /*Find new n-best based on previous n-best j*/
170 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
172 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
174 speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
181 if (rind>=shape_cb_size)
186 res = resp+rind*subvect_size;
188 for (m=0;m<subvect_size;m++)
189 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
191 for (m=0;m<subvect_size;m++)
192 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
197 for (j=0;j<subvect_size;j++)
198 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
200 for (j=0;j<subvect_size;j++)
201 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
204 for (j=0;j<subvect_size;j++)
205 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
210 for (m=0;m<subvect_size;m++)
216 if (rind>=shape_cb_size)
224 g=sign*shape_cb[rind*subvect_size+m];
225 target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
227 g=sign*0.03125*shape_cb[rind*subvect_size+m];
228 /*FIXME: I think that one too can be replaced by target_update */
229 for (n=subvect_size*(i+1);n<nsf;n++,q++)
230 t[n] = SUB32(t[n],g*r[q]);
235 /* Update excitation */
236 /* FIXME: We could update the excitation directly above */
238 exc[j]=ADD32(exc[j],e[j]);
240 /* Update target: only update target if necessary */
243 VARDECL(spx_sig_t *r2);
244 ALLOC(r2, nsf, spx_sig_t);
245 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
247 target[j]=SUB32(target[j],r2[j]);
253 void split_cb_search_shape_sign(
254 spx_sig_t target[], /* target vector */
255 spx_coef_t ak[], /* LPCs for this subframe */
256 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
257 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
258 const void *par, /* Codebook/search parameters*/
259 int p, /* number of LPC coeffs */
260 int nsf, /* number of samples in subframe */
270 VARDECL(spx_word16_t *resp);
272 VARDECL(__m128 *resp2);
276 VARDECL(spx_word32_t *E);
278 VARDECL(spx_word16_t *t);
279 VARDECL(spx_sig_t *e);
280 VARDECL(spx_sig_t *r2);
281 VARDECL(spx_word16_t *tmp);
282 VARDECL(spx_word32_t *ndist);
283 VARDECL(spx_word32_t *odist);
285 VARDECL(spx_word16_t **ot2);
286 VARDECL(spx_word16_t **nt2);
287 spx_word16_t **ot, **nt;
291 const signed char *shape_cb;
292 int shape_cb_size, subvect_size, nb_subvect;
293 const split_cb_params *params;
295 VARDECL(int *best_index);
296 VARDECL(spx_word32_t *best_dist);
297 VARDECL(int *best_nind);
298 VARDECL(int *best_ntarget);
308 split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,complexity,update_target);
311 ALLOC(ot2, N, spx_word16_t*);
312 ALLOC(nt2, N, spx_word16_t*);
313 ALLOC(oind, N, int*);
314 ALLOC(nind, N, int*);
316 params = (const split_cb_params *) par;
317 subvect_size = params->subvect_size;
318 nb_subvect = params->nb_subvect;
319 shape_cb_size = 1<<params->shape_bits;
320 shape_cb = params->shape_cb;
321 have_sign = params->have_sign;
322 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
324 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
325 ALLOC(E, shape_cb_size>>2, __m128);
328 ALLOC(E, shape_cb_size, spx_word32_t);
330 ALLOC(t, nsf, spx_word16_t);
331 ALLOC(e, nsf, spx_sig_t);
332 ALLOC(r2, nsf, spx_sig_t);
333 ALLOC(ind, nb_subvect, int);
335 ALLOC(tmp, 2*N*nsf, spx_word16_t);
339 nt2[i]=tmp+(2*i+1)*nsf;
343 ALLOC(best_index, N, int);
344 ALLOC(best_dist, N, spx_word32_t);
345 ALLOC(best_nind, N, int);
346 ALLOC(best_ntarget, N, int);
347 ALLOC(ndist, N, spx_word32_t);
348 ALLOC(odist, N, spx_word32_t);
350 ALLOC(itmp, 2*N*nb_subvect, int);
353 nind[i]=itmp+2*i*nb_subvect;
354 oind[i]=itmp+(2*i+1)*nb_subvect;
357 /* FIXME: make that adaptive? */
359 t[i]=EXTRACT16(PSHR32(target[i],6));
362 speex_move(&ot[j][0], t, nsf*sizeof(spx_word16_t));
364 /* Pre-compute codewords response and energy */
365 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
370 /*For all subvectors*/
371 for (i=0;i<nb_subvect;i++)
373 /*"erase" nbest list*/
375 ndist[j]=VERY_LARGE32;
377 /*For all n-bests of previous subvector*/
380 spx_word16_t *x=ot[j]+subvect_size*i;
381 spx_word32_t tener = 0;
382 for (m=0;m<subvect_size;m++)
383 tener = MAC16_16(tener, x[m],x[m]);
385 tener = SHR32(tener,1);
389 /*Find new n-best based on previous n-best j*/
391 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
393 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
395 /*For all new n-bests*/
398 /* Compute total distance (including previous sub-vectors */
399 spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
401 /*update n-best list*/
410 ndist[n] = ndist[n-1];
411 best_nind[n] = best_nind[n-1];
412 best_ntarget[n] = best_ntarget[n-1];
415 best_nind[n] = best_index[k];
427 /*previous target (we don't care what happened before*/
428 for (m=(i+1)*subvect_size;m<nsf;m++)
429 nt[j][m]=ot[best_ntarget[j]][m];
431 /* New code: update the rest of the target only if it's worth it */
432 for (m=0;m<subvect_size;m++)
438 if (rind>=shape_cb_size)
446 g=sign*shape_cb[rind*subvect_size+m];
447 target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
449 g=sign*0.03125*shape_cb[rind*subvect_size+m];
450 /*FIXME: I think that one too can be replaced by target_update */
451 for (n=subvect_size*(i+1);n<nsf;n++,q++)
452 nt[j][n] = SUB32(nt[j][n],g*r[q]);
456 for (q=0;q<nb_subvect;q++)
457 nind[j][q]=oind[best_ntarget[j]][q];
458 nind[j][i]=best_nind[j];
461 /*update old-new data*/
462 /* just swap pointers instead of a long copy */
470 for (m=0;m<nb_subvect;m++)
471 oind[j][m]=nind[j][m];
477 for (i=0;i<nb_subvect;i++)
480 speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
483 /* Put everything back together */
484 for (i=0;i<nb_subvect;i++)
489 if (rind>=shape_cb_size)
497 for (j=0;j<subvect_size;j++)
498 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
500 for (j=0;j<subvect_size;j++)
501 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
504 for (j=0;j<subvect_size;j++)
505 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
508 /* Update excitation */
510 exc[j]=ADD32(exc[j],e[j]);
512 /* Update target: only update target if necessary */
515 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
517 target[j]=SUB32(target[j],r2[j]);
522 void split_cb_shape_sign_unquant(
524 const void *par, /* non-overlapping codebook */
525 int nsf, /* number of samples in subframe */
533 const signed char *shape_cb;
534 int shape_cb_size, subvect_size, nb_subvect;
535 const split_cb_params *params;
538 params = (const split_cb_params *) par;
539 subvect_size = params->subvect_size;
540 nb_subvect = params->nb_subvect;
541 shape_cb_size = 1<<params->shape_bits;
542 shape_cb = params->shape_cb;
543 have_sign = params->have_sign;
545 ALLOC(ind, nb_subvect, int);
546 ALLOC(signs, nb_subvect, int);
548 /* Decode codewords and gains */
549 for (i=0;i<nb_subvect;i++)
552 signs[i] = speex_bits_unpack_unsigned(bits, 1);
555 ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
557 /* Compute decoded excitation */
558 for (i=0;i<nb_subvect;i++)
566 for (j=0;j<subvect_size;j++)
567 exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
569 for (j=0;j<subvect_size;j++)
570 exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
573 for (j=0;j<subvect_size;j++)
574 exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
579 void noise_codebook_quant(
580 spx_sig_t target[], /* target vector */
581 spx_coef_t ak[], /* LPCs for this subframe */
582 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
583 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
584 const void *par, /* Codebook/search parameters*/
585 int p, /* number of LPC coeffs */
586 int nsf, /* number of samples in subframe */
596 VARDECL(spx_sig_t *tmp);
597 ALLOC(tmp, nsf, spx_sig_t);
598 residue_percep_zero(target, ak, awk1, awk2, tmp, nsf, p, stack);
608 void noise_codebook_unquant(
610 const void *par, /* non-overlapping codebook */
611 int nsf, /* number of samples in subframe */
616 speex_rand_vec(1, exc, nsf);