1 /* Copyright (C) 2002-2006 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "quant_lsp.h"
42 #include "cb_search.h"
44 #include "stack_alloc.h"
46 #include "../include/speex/speex_bits.h"
49 #include "math_approx.h"
50 #include "os_support.h"
51 #include "../include/speex/speex_callbacks.h"
54 #include "vorbis_psy.h"
58 #define M_PI 3.14159265358979323846 /* pi */
65 #define SUBMODE(x) st->submodes[st->submodeID]->x
67 /* Default size for the encoder and decoder stack (can be changed at compile time).
68 This does not apply when using variable-size arrays or alloca. */
70 #define NB_ENC_STACK (8000*sizeof(spx_sig_t))
74 #define NB_DEC_STACK (4000*sizeof(spx_sig_t))
79 const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927};
80 const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560};
81 const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740};
82 const spx_word16_t exc_gain_quant_scal1_bound[1]={14385};
83 const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224};
86 #define LSP_DELTA1 6553
87 #define LSP_DELTA2 1638
91 const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f};
92 const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f};
93 const float exc_gain_quant_scal1_bound[1]={0.87798f};
94 const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f};
96 #define LSP_MARGIN .002f
97 #define LSP_DELTA1 .2f
98 #define LSP_DELTA2 .05f
103 #define EXTRA_BUFFER 100
105 #define EXTRA_BUFFER 0
109 extern const spx_word16_t lag_window[];
110 extern const spx_word16_t lpc_window[];
112 #ifndef DISABLE_ENCODER
113 void *nb_encoder_init(const SpeexMode *m)
116 const SpeexNBMode *mode;
119 mode=(const SpeexNBMode *)m->mode;
120 st = (EncState*)speex_alloc(sizeof(EncState));
123 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
126 st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
131 st->frameSize = mode->frameSize;
132 st->nbSubframes=mode->frameSize/mode->subframeSize;
133 st->subframeSize=mode->subframeSize;
134 st->windowSize = NB_WINDOW_SIZE;
135 st->lpcSize = mode->lpcSize;
136 st->gamma1=mode->gamma1;
137 st->gamma2=mode->gamma2;
138 st->min_pitch=mode->pitchStart;
139 st->max_pitch=mode->pitchEnd;
140 st->lpc_floor = mode->lpc_floor;
142 st->submodes=mode->submodes;
143 st->submodeID=st->submodeSelect=mode->defaultSubmode;
144 st->bounded_pitch = 1;
146 st->encode_submode = 1;
149 st->psy = vorbis_psy_init(8000, 256);
150 st->curve = (float*)speex_alloc(128*sizeof(float));
151 st->old_curve = (float*)speex_alloc(128*sizeof(float));
152 st->psy_window = (float*)speex_alloc(256*sizeof(float));
155 st->cumul_gain = 1024;
157 st->window= lpc_window;
159 /* Create the window for autocorrelation (lag-windowing) */
160 st->lagWindow = lag_window;
163 for (i=0;i<st->lpcSize;i++)
164 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
166 st->innov_rms_save = NULL;
179 #endif /* #ifndef DISABLE_VBR */
183 st->sampling_rate=8000;
185 st->highpass_enabled = 1;
187 #ifdef ENABLE_VALGRIND
188 VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
193 void nb_encoder_destroy(void *state)
195 EncState *st=(EncState *)state;
196 /* Free all allocated memory */
197 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
198 speex_free_scratch(st->stack);
202 vbr_destroy(&st->vbr);
203 #endif /* #ifndef DISABLE_VBR */
206 vorbis_psy_destroy(st->psy);
207 speex_free (st->curve);
208 speex_free (st->old_curve);
209 speex_free (st->psy_window);
212 /*Free state memory... should be last*/
217 int nb_encoder_ctl(void *state, int request, void *ptr)
223 case SPEEX_GET_FRAME_SIZE:
224 (*(spx_int32_t*)ptr) = st->frameSize;
226 case SPEEX_SET_LOW_MODE:
228 st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
230 case SPEEX_GET_LOW_MODE:
232 (*(spx_int32_t*)ptr) = st->submodeID;
236 st->vbr_enabled = (*(spx_int32_t*)ptr);
239 (*(spx_int32_t*)ptr) = st->vbr_enabled;
242 st->vad_enabled = (*(spx_int32_t*)ptr);
245 (*(spx_int32_t*)ptr) = st->vad_enabled;
248 st->dtx_enabled = (*(spx_int32_t*)ptr);
251 (*(spx_int32_t*)ptr) = st->dtx_enabled;
254 st->abr_enabled = (*(spx_int32_t*)ptr);
255 st->vbr_enabled = st->abr_enabled!=0;
259 spx_int32_t rate, target;
261 target = (*(spx_int32_t*)ptr);
264 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
265 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
273 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
281 (*(spx_int32_t*)ptr) = st->abr_enabled;
283 #endif /* #ifndef DISABLE_VBR */
284 #if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
285 case SPEEX_SET_VBR_QUALITY:
286 st->vbr_quality = (*(float*)ptr);
288 case SPEEX_GET_VBR_QUALITY:
289 (*(float*)ptr) = st->vbr_quality;
291 #endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
292 case SPEEX_SET_QUALITY:
294 int quality = (*(spx_int32_t*)ptr);
299 st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
302 case SPEEX_SET_COMPLEXITY:
303 st->complexity = (*(spx_int32_t*)ptr);
304 if (st->complexity<0)
307 case SPEEX_GET_COMPLEXITY:
308 (*(spx_int32_t*)ptr) = st->complexity;
310 case SPEEX_SET_BITRATE:
313 spx_int32_t rate, target;
314 target = (*(spx_int32_t*)ptr);
317 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
318 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
325 case SPEEX_GET_BITRATE:
326 if (st->submodes[st->submodeID])
327 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
329 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
331 case SPEEX_SET_SAMPLING_RATE:
332 st->sampling_rate = (*(spx_int32_t*)ptr);
334 case SPEEX_GET_SAMPLING_RATE:
335 (*(spx_int32_t*)ptr)=st->sampling_rate;
337 case SPEEX_RESET_STATE:
340 st->bounded_pitch = 1;
342 for (i=0;i<st->lpcSize;i++)
343 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
344 for (i=0;i<st->lpcSize;i++)
345 st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
346 for (i=0;i<st->frameSize+st->max_pitch+1;i++)
347 st->excBuf[i]=st->swBuf[i]=0;
348 for (i=0;i<st->windowSize-st->frameSize;i++)
352 case SPEEX_SET_SUBMODE_ENCODING:
353 st->encode_submode = (*(spx_int32_t*)ptr);
355 case SPEEX_GET_SUBMODE_ENCODING:
356 (*(spx_int32_t*)ptr) = st->encode_submode;
358 case SPEEX_GET_LOOKAHEAD:
359 (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize);
361 case SPEEX_SET_PLC_TUNING:
362 st->plc_tuning = (*(spx_int32_t*)ptr);
363 if (st->plc_tuning>100)
366 case SPEEX_GET_PLC_TUNING:
367 (*(spx_int32_t*)ptr)=(st->plc_tuning);
370 case SPEEX_SET_VBR_MAX_BITRATE:
371 st->vbr_max = (*(spx_int32_t*)ptr);
373 case SPEEX_GET_VBR_MAX_BITRATE:
374 (*(spx_int32_t*)ptr) = st->vbr_max;
376 #endif /* #ifndef DISABLE_VBR */
377 case SPEEX_SET_HIGHPASS:
378 st->highpass_enabled = (*(spx_int32_t*)ptr);
380 case SPEEX_GET_HIGHPASS:
381 (*(spx_int32_t*)ptr) = st->highpass_enabled;
384 /* This is all internal stuff past this point */
385 case SPEEX_GET_PI_GAIN:
388 spx_word32_t *g = (spx_word32_t*)ptr;
389 for (i=0;i<st->nbSubframes;i++)
396 for (i=0;i<st->nbSubframes;i++)
397 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
401 case SPEEX_GET_RELATIVE_QUALITY:
402 (*(float*)ptr)=st->relative_quality;
404 #endif /* #ifndef DISABLE_VBR */
405 case SPEEX_SET_INNOVATION_SAVE:
406 st->innov_rms_save = (spx_word16_t*)ptr;
408 case SPEEX_SET_WIDEBAND:
409 st->isWideband = *((spx_int32_t*)ptr);
411 case SPEEX_GET_STACK:
412 *((char**)ptr) = st->stack;
415 speex_warning_int("Unknown nb_ctl request: ", request);
422 int nb_encode(void *state, void *vin, SpeexBits *bits)
427 spx_word16_t ol_pitch_coef;
428 spx_word32_t ol_gain;
429 VARDECL(spx_word16_t *ringing);
430 VARDECL(spx_word16_t *target);
431 VARDECL(spx_sig_t *innov);
432 VARDECL(spx_word32_t *exc32);
433 VARDECL(spx_mem_t *mem);
434 VARDECL(spx_coef_t *bw_lpc1);
435 VARDECL(spx_coef_t *bw_lpc2);
436 VARDECL(spx_coef_t *lpc);
437 VARDECL(spx_lsp_t *lsp);
438 VARDECL(spx_lsp_t *qlsp);
439 VARDECL(spx_lsp_t *interp_lsp);
440 VARDECL(spx_lsp_t *interp_qlsp);
441 VARDECL(spx_coef_t *interp_lpc);
442 VARDECL(spx_coef_t *interp_qlpc);
444 VARDECL(spx_word16_t *syn_resp);
445 VARDECL(spx_word16_t *real_exc);
448 spx_word16_t fine_gain;
449 spx_word16_t *in = (spx_word16_t*)vin;
451 st=(EncState *)state;
454 ALLOC(lpc, st->lpcSize, spx_coef_t);
455 ALLOC(bw_lpc1, st->lpcSize, spx_coef_t);
456 ALLOC(bw_lpc2, st->lpcSize, spx_coef_t);
457 ALLOC(lsp, st->lpcSize, spx_lsp_t);
458 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
459 ALLOC(interp_lsp, st->lpcSize, spx_lsp_t);
460 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
461 ALLOC(interp_lpc, st->lpcSize, spx_coef_t);
462 ALLOC(interp_qlpc, st->lpcSize, spx_coef_t);
464 st->exc = st->excBuf + st->max_pitch + 2;
465 st->sw = st->swBuf + st->max_pitch + 2;
466 /* Move signals 1 frame towards the past */
467 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, st->max_pitch+2);
468 SPEEX_MOVE(st->swBuf, st->swBuf+st->frameSize, st->max_pitch+2);
470 if (st->highpass_enabled)
471 highpass(in, in, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);
474 VARDECL(spx_word16_t *w_sig);
475 VARDECL(spx_word16_t *autocorr);
476 ALLOC(w_sig, st->windowSize, spx_word16_t);
477 ALLOC(autocorr, st->lpcSize+1, spx_word16_t);
478 /* Window for analysis */
479 for (i=0;i<st->windowSize-st->frameSize;i++)
480 w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT));
481 for (;i<st->windowSize;i++)
482 w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT));
483 /* Compute auto-correlation */
484 _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize);
485 autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
487 /* Lag windowing: equivalent to filtering in the power-spectrum domain */
488 for (i=0;i<st->lpcSize+1;i++)
489 autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]);
491 /* Levinson-Durbin */
492 _spx_lpc(lpc, autocorr, st->lpcSize);
493 /* LPC to LSPs (x-domain) transform */
494 roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack);
495 /* Check if we found all the roots */
496 if (roots!=st->lpcSize)
498 /*If we can't find all LSP's, do some damage control and use previous filter*/
499 for (i=0;i<st->lpcSize;i++)
501 lsp[i]=st->old_lsp[i];
509 /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
511 int diff = st->windowSize-st->frameSize;
513 for (i=0;i<st->lpcSize;i++)
514 interp_lsp[i] = lsp[i];
516 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
518 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
520 /* Compute interpolated LPCs (unquantized) for whole frame*/
521 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
525 if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
527 || st->vbr_enabled || st->vad_enabled
532 spx_word16_t nol_pitch_coef[6];
534 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
535 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
537 SPEEX_COPY(st->sw, st->winBuf, diff);
538 SPEEX_COPY(st->sw+diff, in, st->frameSize-diff);
539 filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack);
541 open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
542 nol_pitch, nol_pitch_coef, 6, stack);
543 ol_pitch=nol_pitch[0];
544 ol_pitch_coef = nol_pitch_coef[0];
545 /*Try to remove pitch multiples*/
549 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
551 if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
553 (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
554 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
556 /*ol_pitch_coef=nol_pitch_coef[i];*/
557 ol_pitch = nol_pitch[i];
562 /*ol_pitch_coef = sqrt(ol_pitch_coef);*/
569 /*Compute "real" excitation*/
570 SPEEX_COPY(st->exc, st->winBuf, diff);
571 SPEEX_COPY(st->exc+diff, in, st->frameSize-diff);
572 fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack);
574 /* Compute open-loop excitation gain */
576 spx_word16_t g = compute_rms16(st->exc, st->frameSize);
577 if (st->submodeID!=1 && ol_pitch>0)
578 ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
579 spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
581 ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
586 SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize);
587 SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize);
588 compute_curve(st->psy, st->psy_window, st->curve);
589 /*print_vec(st->curve, 128, "curve");*/
591 SPEEX_COPY(st->old_curve, st->curve, 128);
596 if (st->vbr_enabled||st->vad_enabled)
599 for (i=0;i<st->lpcSize;i++)
600 lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
601 lsp_dist /= LSP_SCALING*LSP_SCALING;
606 if (st->abr_drift2 * st->abr_drift > 0)
608 /* Only adapt if long-term and short-term drift are the same sign */
609 qual_change = -.00001*st->abr_drift/(1+st->abr_count);
612 if (qual_change<-.05)
615 st->vbr_quality += qual_change;
616 if (st->vbr_quality>10)
618 if (st->vbr_quality<0)
622 st->relative_quality = vbr_analysis(&st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
623 /*if (delta_qual<0)*/
624 /* delta_qual*=.1*(3+st->vbr_quality);*/
635 v1=(int)floor(st->vbr_quality);
637 thresh = vbr_nb_thresh[mode][v1];
639 thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
640 if (st->relative_quality > thresh &&
641 st->relative_quality-thresh<min_diff)
644 min_diff = st->relative_quality-thresh;
651 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
663 speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
667 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
668 if (rate > st->vbr_max)
671 speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
678 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
679 st->abr_drift+=(bitrate-st->abr_enabled);
680 st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
681 st->abr_count += 1.0;
687 if (st->relative_quality<2)
689 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
699 mode=st->submodeSelect;
701 /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
705 st->relative_quality = -1;
707 #endif /* #ifndef DISABLE_VBR */
709 if (st->encode_submode)
711 /* First, transmit a zero for narrowband */
712 speex_bits_pack(bits, 0, 1);
714 /* Transmit the sub-mode we use for this frame */
715 speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
719 /* If null mode (no transmission), just set a couple things to zero*/
720 if (st->submodes[st->submodeID] == NULL)
722 for (i=0;i<st->frameSize;i++)
723 st->exc[i]=st->sw[i]=VERY_SMALL;
725 for (i=0;i<st->lpcSize;i++)
728 st->bounded_pitch = 1;
730 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
732 /* Clear memory (no need to really compute it) */
733 for (i=0;i<st->lpcSize;i++)
739 /* LSP Quantization */
742 for (i=0;i<st->lpcSize;i++)
743 st->old_lsp[i] = lsp[i];
748 #if 1 /*0 for unquantized*/
749 SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);
751 for (i=0;i<st->lpcSize;i++)
755 /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
756 if (SUBMODE(lbr_pitch)!=-1)
758 speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
761 if (SUBMODE(forced_pitch_gain))
764 /* This just damps the pitch a bit, because it tends to be too aggressive when forced */
765 ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
767 quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
769 quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
775 speex_bits_pack(bits, quant, 4);
776 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
780 /*Quantize and transmit open-loop excitation gain*/
783 int qe = scal_quant32(ol_gain, ol_gain_table, 32);
784 /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
785 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
786 speex_bits_pack(bits, qe, 5);
790 int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
795 ol_gain = exp(qe/3.5)*SIG_SCALING;
796 speex_bits_pack(bits, qe, 5);
802 /* Special case for first frame */
805 for (i=0;i<st->lpcSize;i++)
806 st->old_qlsp[i] = qlsp[i];
810 ALLOC(target, st->subframeSize, spx_word16_t);
811 ALLOC(innov, st->subframeSize, spx_sig_t);
812 ALLOC(exc32, st->subframeSize, spx_word32_t);
813 ALLOC(ringing, st->subframeSize, spx_word16_t);
814 ALLOC(syn_resp, st->subframeSize, spx_word16_t);
815 ALLOC(real_exc, st->subframeSize, spx_word16_t);
816 ALLOC(mem, st->lpcSize, spx_mem_t);
818 /* Loop on sub-frames */
819 for (sub=0;sub<st->nbSubframes;sub++)
825 int response_bound = st->subframeSize;
827 /* Offset relative to start of frame */
828 offset = st->subframeSize*sub;
831 /* Weighted signal */
834 /* LSP interpolation (quantized and unquantized) */
835 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes);
836 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
838 /* Make sure the filters are stable */
839 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
840 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
842 /* Compute interpolated LPCs (quantized and unquantized) */
843 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
845 lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack);
847 /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
849 spx_word32_t pi_g=LPC_SCALING;
850 for (i=0;i<st->lpcSize;i+=2)
852 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
853 pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
855 st->pi_gain[sub] = pi_g;
860 float curr_curve[128];
861 float fact = ((float)sub+1.0f)/st->nbSubframes;
863 curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
864 curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
867 /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
868 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
869 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
870 /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
873 /*FIXME: This will break if we change the window size */
874 speex_assert(st->windowSize-st->frameSize == st->subframeSize);
880 buf = &in[((sub-1)*st->subframeSize)];
881 for (i=0;i<st->subframeSize;i++)
882 real_exc[i] = sw[i] = buf[i];
884 fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack);
886 if (st->complexity==0)
887 response_bound >>= 1;
888 compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
889 for (i=response_bound;i<st->subframeSize;i++)
890 syn_resp[i]=VERY_SMALL;
892 /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
893 for (i=0;i<st->lpcSize;i++)
894 mem[i]=SHL32(st->mem_sp[i],1);
895 for (i=0;i<st->subframeSize;i++)
896 ringing[i] = VERY_SMALL;
898 iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack);
899 for (i=0;i<st->lpcSize;i++)
900 mem[i]=SHL32(st->mem_sw[i],1);
901 filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack);
902 SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound);
904 iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack);
905 for (i=0;i<st->lpcSize;i++)
906 mem[i]=SHL32(st->mem_sw[i],1);
907 filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);
910 /* Compute weighted signal */
911 for (i=0;i<st->lpcSize;i++)
912 mem[i]=st->mem_sw[i];
913 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack);
915 if (st->complexity==0)
916 for (i=0;i<st->lpcSize;i++)
917 st->mem_sw[i]=mem[i];
919 /* Compute target signal (saturation prevents overflows on clipped input speech) */
920 for (i=0;i<st->subframeSize;i++)
921 target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767));
923 /* Reset excitation */
924 SPEEX_MEMSET(exc, 0, st->subframeSize);
926 /* If we have a long-term predictor (otherwise, something's wrong) */
927 speex_assert (SUBMODE(ltp_quant));
929 int pit_min, pit_max;
930 /* Long-term prediction */
931 if (SUBMODE(lbr_pitch) != -1)
933 /* Low bit-rate pitch handling */
935 margin = SUBMODE(lbr_pitch);
938 if (ol_pitch < st->min_pitch+margin-1)
939 ol_pitch=st->min_pitch+margin-1;
940 if (ol_pitch > st->max_pitch-margin)
941 ol_pitch=st->max_pitch-margin;
942 pit_min = ol_pitch-margin+1;
943 pit_max = ol_pitch+margin;
945 pit_min=pit_max=ol_pitch;
948 pit_min = st->min_pitch;
949 pit_max = st->max_pitch;
952 /* Force pitch to use only the current frame if needed */
953 if (st->bounded_pitch && pit_max>offset)
956 /* Perform pitch search */
957 pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
958 exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
959 st->lpcSize, st->subframeSize, bits, stack,
960 exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
962 st->pitch[sub]=pitch;
964 /* Quantization of innovation */
965 SPEEX_MEMSET(innov, 0, st->subframeSize);
967 /* FIXME: Make sure this is save from overflows (so far so good) */
968 for (i=0;i<st->subframeSize;i++)
969 real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));
971 ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT);
973 /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
976 spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
983 fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
985 /* Calculate gain correction for the sub-frame (if any) */
986 if (SUBMODE(have_subframe_gain))
989 if (SUBMODE(have_subframe_gain)==3)
991 qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
992 speex_bits_pack(bits, qe, 3);
993 ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
995 qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
996 speex_bits_pack(bits, qe, 1);
997 ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
1003 /*printf ("%f %f\n", ener, ol_gain);*/
1005 /* Normalize innovation */
1006 signal_div(target, target, ener, st->subframeSize);
1008 /* Quantize innovation */
1009 speex_assert (SUBMODE(innovation_quant));
1011 /* Codebook search */
1012 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
1013 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
1014 innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
1016 /* De-normalize innovation and update excitation */
1017 signal_mul(innov, innov, ener, st->subframeSize);
1019 for (i=0;i<st->subframeSize;i++)
1020 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1022 /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
1023 if (SUBMODE(double_codebook)) {
1024 char *tmp_stack=stack;
1025 VARDECL(spx_sig_t *innov2);
1026 ALLOC(innov2, st->subframeSize, spx_sig_t);
1027 SPEEX_MEMSET(innov2, 0, st->subframeSize);
1028 for (i=0;i<st->subframeSize;i++)
1029 target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
1030 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
1031 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
1032 innov2, syn_resp, bits, stack, st->complexity, 0);
1033 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
1034 for (i=0;i<st->subframeSize;i++)
1035 innov[i] = ADD32(innov[i],innov2[i]);
1038 for (i=0;i<st->subframeSize;i++)
1039 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1040 #ifndef DISABLE_WIDEBAND
1041 if (st->innov_rms_save)
1042 st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize);
1046 /* Final signal synthesis from excitation */
1047 iir_mem16(exc, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack);
1049 /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
1050 if (st->complexity!=0)
1051 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack);
1055 /* Store the LSPs for interpolation in the next frame */
1056 if (st->submodeID>=1)
1058 for (i=0;i<st->lpcSize;i++)
1059 st->old_lsp[i] = lsp[i];
1060 for (i=0;i<st->lpcSize;i++)
1061 st->old_qlsp[i] = qlsp[i];
1064 #ifdef VORBIS_PSYCHO
1065 if (st->submodeID>=1)
1066 SPEEX_COPY(st->old_curve, st->curve, 128);
1069 if (st->submodeID==1)
1073 speex_bits_pack(bits, 15, 4);
1076 speex_bits_pack(bits, 0, 4);
1079 /* The next frame will not be the first (Duh!) */
1081 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
1083 if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
1084 st->bounded_pitch = 1;
1086 st->bounded_pitch = 0;
1090 #endif /* DISABLE_ENCODER */
1093 #ifndef DISABLE_DECODER
1094 void *nb_decoder_init(const SpeexMode *m)
1097 const SpeexNBMode *mode;
1100 mode=(const SpeexNBMode*)m->mode;
1101 st = (DecState *)speex_alloc(sizeof(DecState));
1104 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
1107 st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
1113 st->encode_submode = 1;
1116 /* Codec parameters, should eventually have several "modes"*/
1117 st->frameSize = mode->frameSize;
1118 st->nbSubframes=mode->frameSize/mode->subframeSize;
1119 st->subframeSize=mode->subframeSize;
1120 st->lpcSize = mode->lpcSize;
1121 st->min_pitch=mode->pitchStart;
1122 st->max_pitch=mode->pitchEnd;
1124 st->submodes=mode->submodes;
1125 st->submodeID=mode->defaultSubmode;
1127 st->lpc_enh_enabled=1;
1129 SPEEX_MEMSET(st->excBuf, 0, st->frameSize + st->max_pitch);
1131 st->last_pitch = 40;
1133 st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
1134 st->pitch_gain_buf_idx = 0;
1137 st->sampling_rate=8000;
1138 st->last_ol_gain = 0;
1140 st->user_callback.func = &speex_default_user_handler;
1141 st->user_callback.data = NULL;
1143 st->speex_callbacks[i].func = NULL;
1145 st->voc_m1=st->voc_m2=st->voc_mean=0;
1149 st->highpass_enabled = 1;
1151 #ifdef ENABLE_VALGRIND
1152 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
1157 void nb_decoder_destroy(void *state)
1160 st=(DecState*)state;
1162 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
1163 speex_free_scratch(st->stack);
1169 int nb_decoder_ctl(void *state, int request, void *ptr)
1172 st=(DecState*)state;
1175 case SPEEX_SET_LOW_MODE:
1176 case SPEEX_SET_MODE:
1177 st->submodeID = (*(spx_int32_t*)ptr);
1179 case SPEEX_GET_LOW_MODE:
1180 case SPEEX_GET_MODE:
1181 (*(spx_int32_t*)ptr) = st->submodeID;
1184 st->lpc_enh_enabled = *((spx_int32_t*)ptr);
1187 *((spx_int32_t*)ptr) = st->lpc_enh_enabled;
1189 case SPEEX_GET_FRAME_SIZE:
1190 (*(spx_int32_t*)ptr) = st->frameSize;
1192 case SPEEX_GET_BITRATE:
1193 if (st->submodes[st->submodeID])
1194 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1196 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1198 case SPEEX_SET_SAMPLING_RATE:
1199 st->sampling_rate = (*(spx_int32_t*)ptr);
1201 case SPEEX_GET_SAMPLING_RATE:
1202 (*(spx_int32_t*)ptr)=st->sampling_rate;
1204 case SPEEX_SET_HANDLER:
1206 SpeexCallback *c = (SpeexCallback*)ptr;
1207 st->speex_callbacks[c->callback_id].func=c->func;
1208 st->speex_callbacks[c->callback_id].data=c->data;
1209 st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
1212 case SPEEX_SET_USER_HANDLER:
1214 SpeexCallback *c = (SpeexCallback*)ptr;
1215 st->user_callback.func=c->func;
1216 st->user_callback.data=c->data;
1217 st->user_callback.callback_id=c->callback_id;
1220 case SPEEX_RESET_STATE:
1223 for (i=0;i<st->lpcSize;i++)
1225 for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
1229 case SPEEX_SET_SUBMODE_ENCODING:
1230 st->encode_submode = (*(spx_int32_t*)ptr);
1232 case SPEEX_GET_SUBMODE_ENCODING:
1233 (*(spx_int32_t*)ptr) = st->encode_submode;
1235 case SPEEX_GET_LOOKAHEAD:
1236 (*(spx_int32_t*)ptr)=st->subframeSize;
1238 case SPEEX_SET_HIGHPASS:
1239 st->highpass_enabled = (*(spx_int32_t*)ptr);
1241 case SPEEX_GET_HIGHPASS:
1242 (*(spx_int32_t*)ptr) = st->highpass_enabled;
1244 /* FIXME: Convert to fixed-point and re-enable even when float API is disabled */
1245 #ifndef DISABLE_FLOAT_API
1246 case SPEEX_GET_ACTIVITY:
1249 ret = log(st->level/st->min_level)/log(st->max_level/st->min_level);
1252 /* Done in a strange way to catch NaNs as well */
1255 /*printf ("%f %f %f %f\n", st->level, st->min_level, st->max_level, ret);*/
1256 (*(spx_int32_t*)ptr) = (int)(100*ret);
1260 case SPEEX_GET_PI_GAIN:
1263 spx_word32_t *g = (spx_word32_t*)ptr;
1264 for (i=0;i<st->nbSubframes;i++)
1265 g[i]=st->pi_gain[i];
1271 for (i=0;i<st->nbSubframes;i++)
1272 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1275 case SPEEX_GET_DTX_STATUS:
1276 *((spx_int32_t*)ptr) = st->dtx_enabled;
1278 case SPEEX_SET_INNOVATION_SAVE:
1279 st->innov_save = (spx_word16_t*)ptr;
1281 case SPEEX_SET_WIDEBAND:
1282 st->isWideband = *((spx_int32_t*)ptr);
1284 case SPEEX_GET_STACK:
1285 *((char**)ptr) = st->stack;
1288 speex_warning_int("Unknown nb_ctl request: ", request);
1295 #define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
1298 const spx_word16_t attenuation[10] = {32767, 31483, 27923, 22861, 17278, 12055, 7764, 4616, 2533, 1283};
1300 const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.237, 0.141, 0.077, 0.039};
1304 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack)
1308 spx_word16_t pitch_gain;
1310 spx_word16_t gain_med;
1311 spx_word16_t innov_gain;
1312 spx_word16_t noise_gain;
1314 st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
1316 if (st->count_lost<10)
1317 fact = attenuation[st->count_lost];
1321 gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
1322 if (gain_med < st->last_pitch_gain)
1323 st->last_pitch_gain = gain_med;
1326 pitch_gain = st->last_pitch_gain;
1329 pitch_gain = SHL16(pitch_gain, 9);
1331 pitch_gain = GAIN_SCALING_1*st->last_pitch_gain;
1335 pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL;
1336 /* FIXME: This was rms of innovation (not exc) */
1337 innov_gain = compute_rms16(st->exc, st->frameSize);
1338 noise_gain = MULT16_16_Q15(innov_gain, MULT16_16_Q15(fact, SUB16(Q15ONE,MULT16_16_Q15(pitch_gain,pitch_gain))));
1339 /* Shift all buffers by one frame */
1340 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1343 pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT);
1344 if (pitch_val > st->max_pitch)
1345 pitch_val = st->max_pitch;
1346 if (pitch_val < st->min_pitch)
1347 pitch_val = st->min_pitch;
1348 for (i=0;i<st->frameSize;i++)
1350 st->exc[i]= MULT16_16_Q15(pitch_gain, (st->exc[i-pitch_val]+VERY_SMALL)) +
1351 speex_rand(noise_gain, &st->seed);
1354 bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize);
1355 iir_mem16(&st->exc[-st->subframeSize], st->interp_qlpc, out, st->frameSize,
1356 st->lpcSize, st->mem_sp, stack);
1357 highpass(out, out, st->frameSize, HIGHPASS_NARROWBAND|HIGHPASS_OUTPUT, st->mem_hp);
1361 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9);
1362 if (st->pitch_gain_buf_idx > 2) /* rollover */
1363 st->pitch_gain_buf_idx = 0;
1366 /* Just so we don't need to carry the complete wideband mode information */
1367 static const int wb_skip_table[8] = {0, 36, 112, 192, 352, 0, 0, 0};
1369 int nb_decode(void *state, SpeexBits *bits, void *vout)
1374 spx_word16_t pitch_gain[3];
1375 spx_word32_t ol_gain=0;
1377 spx_word16_t ol_pitch_coef=0;
1379 spx_word16_t best_pitch_gain=0;
1383 VARDECL(spx_sig_t *innov);
1384 VARDECL(spx_word32_t *exc32);
1385 VARDECL(spx_coef_t *ak);
1386 VARDECL(spx_lsp_t *qlsp);
1387 spx_word16_t pitch_average=0;
1389 spx_word16_t *out = (spx_word16_t*)vout;
1390 VARDECL(spx_lsp_t *interp_qlsp);
1392 st=(DecState*)state;
1395 st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
1397 /* Check if we're in DTX mode*/
1398 if (!bits && st->dtx_enabled)
1403 /* If bits is NULL, consider the packet to be lost (what could we do anyway) */
1406 nb_decode_lost(st, out, stack);
1410 if (st->encode_submode)
1413 /* Search for next narrowband block (handle requests, skip wideband blocks) */
1415 if (speex_bits_remaining(bits)<5)
1417 wideband = speex_bits_unpack_unsigned(bits, 1);
1418 if (wideband) /* Skip wideband block (for compatibility) */
1422 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1423 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1424 advance = wb_skip_table[submode];
1427 speex_notify("Invalid mode encountered. The stream is corrupted.");
1430 advance -= (SB_SUBMODE_BITS+1);
1431 speex_bits_advance(bits, advance);
1433 if (speex_bits_remaining(bits)<5)
1435 wideband = speex_bits_unpack_unsigned(bits, 1);
1438 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1439 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1440 advance = wb_skip_table[submode];
1443 speex_notify("Invalid mode encountered. The stream is corrupted.");
1446 advance -= (SB_SUBMODE_BITS+1);
1447 speex_bits_advance(bits, advance);
1448 wideband = speex_bits_unpack_unsigned(bits, 1);
1451 speex_notify("More than two wideband layers found. The stream is corrupted.");
1457 if (speex_bits_remaining(bits)<4)
1459 /* FIXME: Check for overflow */
1460 m = speex_bits_unpack_unsigned(bits, 4);
1461 if (m==15) /* We found a terminator */
1464 } else if (m==14) /* Speex in-band request */
1466 int ret = speex_inband_handler(bits, st->speex_callbacks, state);
1469 } else if (m==13) /* User in-band request */
1471 int ret = st->user_callback.func(bits, state, st->user_callback.data);
1474 } else if (m>8) /* Invalid mode */
1476 speex_notify("Invalid mode encountered. The stream is corrupted.");
1482 /* Get the sub-mode that was used */
1488 /* Shift all buffers by one frame */
1489 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1491 /* If null mode (no transmission), just set a couple things to zero*/
1492 if (st->submodes[st->submodeID] == NULL)
1494 VARDECL(spx_coef_t *lpc);
1495 ALLOC(lpc, st->lpcSize, spx_coef_t);
1496 bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize);
1498 spx_word16_t innov_gain=0;
1499 /* FIXME: This was innov, not exc */
1500 innov_gain = compute_rms16(st->exc, st->frameSize);
1501 for (i=0;i<st->frameSize;i++)
1502 st->exc[i]=speex_rand(innov_gain, &st->seed);
1508 /* Final signal synthesis from excitation */
1509 iir_mem16(st->exc, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack);
1515 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
1517 /* Unquantize LSPs */
1518 SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits);
1520 /*Damp memory if a frame was lost and the LSP changed too much*/
1524 spx_word32_t lsp_dist=0;
1525 for (i=0;i<st->lpcSize;i++)
1526 lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i])));
1528 fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2));
1530 fact = .6*exp(-.2*lsp_dist);
1532 for (i=0;i<st->lpcSize;i++)
1533 st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]);
1537 /* Handle first frame and lost-packet case */
1538 if (st->first || st->count_lost)
1540 for (i=0;i<st->lpcSize;i++)
1541 st->old_qlsp[i] = qlsp[i];
1544 /* Get open-loop pitch estimation for low bit-rate pitch coding */
1545 if (SUBMODE(lbr_pitch)!=-1)
1547 ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1550 if (SUBMODE(forced_pitch_gain))
1553 quant = speex_bits_unpack_unsigned(bits, 4);
1554 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
1557 /* Get global excitation gain */
1560 qe = speex_bits_unpack_unsigned(bits, 5);
1562 /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */
1563 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
1565 ol_gain = SIG_SCALING*exp(qe/3.5);
1569 ALLOC(ak, st->lpcSize, spx_coef_t);
1570 ALLOC(innov, st->subframeSize, spx_sig_t);
1571 ALLOC(exc32, st->subframeSize, spx_word32_t);
1573 if (st->submodeID==1)
1576 extra = speex_bits_unpack_unsigned(bits, 4);
1583 if (st->submodeID>1)
1586 /*Loop on subframes */
1587 for (sub=0;sub<st->nbSubframes;sub++)
1592 spx_word16_t *innov_save = NULL;
1595 /* Offset relative to start of frame */
1596 offset = st->subframeSize*sub;
1599 /* Original signal */
1602 innov_save = st->innov_save+offset;
1605 /* Reset excitation */
1606 SPEEX_MEMSET(exc, 0, st->subframeSize);
1608 /*Adaptive codebook contribution*/
1609 speex_assert (SUBMODE(ltp_unquant));
1611 int pit_min, pit_max;
1612 /* Handle pitch constraints if any */
1613 if (SUBMODE(lbr_pitch) != -1)
1616 margin = SUBMODE(lbr_pitch);
1619 /* GT - need optimization?
1620 if (ol_pitch < st->min_pitch+margin-1)
1621 ol_pitch=st->min_pitch+margin-1;
1622 if (ol_pitch > st->max_pitch-margin)
1623 ol_pitch=st->max_pitch-margin;
1624 pit_min = ol_pitch-margin+1;
1625 pit_max = ol_pitch+margin;
1627 pit_min = ol_pitch-margin+1;
1628 if (pit_min < st->min_pitch)
1629 pit_min = st->min_pitch;
1630 pit_max = ol_pitch+margin;
1631 if (pit_max > st->max_pitch)
1632 pit_max = st->max_pitch;
1634 pit_min = pit_max = ol_pitch;
1637 pit_min = st->min_pitch;
1638 pit_max = st->max_pitch;
1643 SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1644 st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1645 st->count_lost, offset, st->last_pitch_gain, 0);
1647 /* Ensuring that things aren't blowing up as would happen if e.g. an encoder is
1648 crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat).
1649 We can probably be even more aggressive and limit to 15000 or so. */
1650 sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize);
1652 tmp = gain_3tap_to_1tap(pitch_gain);
1654 pitch_average += tmp;
1655 if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5)
1656 || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5))
1657 || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) )
1660 if (tmp > best_pitch_gain)
1661 best_pitch_gain = tmp;
1665 /* Unquantize the innovation */
1670 SPEEX_MEMSET(innov, 0, st->subframeSize);
1672 /* Decode sub-frame gain correction */
1673 if (SUBMODE(have_subframe_gain)==3)
1675 q_energy = speex_bits_unpack_unsigned(bits, 3);
1676 ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain);
1677 } else if (SUBMODE(have_subframe_gain)==1)
1679 q_energy = speex_bits_unpack_unsigned(bits, 1);
1680 ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain);
1685 speex_assert (SUBMODE(innovation_unquant));
1687 /*Fixed codebook contribution*/
1688 SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1689 /* De-normalize innovation and update excitation */
1691 signal_mul(innov, innov, ener, st->subframeSize);
1693 /* Decode second codebook (only for some modes) */
1694 if (SUBMODE(double_codebook))
1696 char *tmp_stack=stack;
1697 VARDECL(spx_sig_t *innov2);
1698 ALLOC(innov2, st->subframeSize, spx_sig_t);
1699 SPEEX_MEMSET(innov2, 0, st->subframeSize);
1700 SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1701 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
1702 for (i=0;i<st->subframeSize;i++)
1703 innov[i] = ADD32(innov[i], innov2[i]);
1706 for (i=0;i<st->subframeSize;i++)
1707 exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1708 /*print_vec(exc, 40, "innov");*/
1711 for (i=0;i<st->subframeSize;i++)
1712 innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT));
1717 if (st->submodeID==1)
1719 spx_word16_t g=ol_pitch_coef;
1720 g=MULT16_16_P14(QCONST16(1.5f,14),(g-QCONST16(.2f,6)));
1726 SPEEX_MEMSET(exc, 0, st->subframeSize);
1727 while (st->voc_offset<st->subframeSize)
1729 /* exc[st->voc_offset]= g*sqrt(2*ol_pitch)*ol_gain;
1730 Not quite sure why we need the factor of two in the sqrt */
1731 if (st->voc_offset>=0)
1732 exc[st->voc_offset]=MULT16_16(spx_sqrt(MULT16_16_16(2,ol_pitch)),EXTRACT16(PSHR32(MULT16_16(g,PSHR32(ol_gain,SIG_SHIFT)),6)));
1733 st->voc_offset+=ol_pitch;
1735 st->voc_offset -= st->subframeSize;
1737 for (i=0;i<st->subframeSize;i++)
1739 spx_word16_t exci=exc[i];
1740 exc[i]= ADD16(ADD16(MULT16_16_Q15(QCONST16(.7f,15),exc[i]) , MULT16_16_Q15(QCONST16(.3f,15),st->voc_m1)),
1741 SUB16(MULT16_16_Q15(Q15_ONE-MULT16_16_16(QCONST16(.85f,9),g),EXTRACT16(PSHR32(innov[i],SIG_SHIFT))),
1742 MULT16_16_Q15(MULT16_16_16(QCONST16(.15f,9),g),EXTRACT16(PSHR32(st->voc_m2,SIG_SHIFT)))
1745 st->voc_m2=innov[i];
1746 st->voc_mean = EXTRACT16(PSHR32(ADD32(MULT16_16(QCONST16(.8f,15),st->voc_mean), MULT16_16(QCONST16(.2f,15),exc[i])), 15));
1747 exc[i]-=st->voc_mean;
1754 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
1756 if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost)
1758 multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1759 multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1761 SPEEX_COPY(out, &st->exc[-st->subframeSize], st->frameSize);
1764 /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
1767 spx_word16_t exc_ener;
1768 spx_word32_t gain32;
1770 exc_ener = compute_rms16 (st->exc, st->frameSize);
1771 gain32 = PDIV32(ol_gain, ADD16(exc_ener,1));
1775 gain = EXTRACT16(gain32);
1781 for (i=0;i<st->frameSize;i++)
1783 st->exc[i] = MULT16_16_Q14(gain, st->exc[i]);
1784 out[i]=st->exc[i-st->subframeSize];
1788 /*Loop on subframes */
1789 for (sub=0;sub<st->nbSubframes;sub++)
1794 /* Offset relative to start of frame */
1795 offset = st->subframeSize*sub;
1796 /* Original signal */
1801 /* LSP interpolation (quantized and unquantized) */
1802 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
1804 /* Make sure the LSP's are stable */
1805 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
1807 /* Compute interpolated LPCs (unquantized) */
1808 lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack);
1810 /* Compute analysis filter at w=pi */
1812 spx_word32_t pi_g=LPC_SCALING;
1813 for (i=0;i<st->lpcSize;i+=2)
1815 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
1816 pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i])));
1818 st->pi_gain[sub] = pi_g;
1821 iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1824 for (i=0;i<st->lpcSize;i++)
1825 st->interp_qlpc[i] = ak[i];
1829 if (st->highpass_enabled)
1830 highpass(out, out, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_OUTPUT, st->mem_hp);
1831 /*for (i=0;i<st->frameSize;i++)
1832 printf ("%d\n", (int)st->frame[i]);*/
1834 /* Tracking output level */
1835 st->level = 1+PSHR32(ol_gain,SIG_SHIFT);
1836 st->max_level = MAX16(MULT16_16_Q15(QCONST16(.99f,15), st->max_level), st->level);
1837 st->min_level = MIN16(ADD16(1,MULT16_16_Q14(QCONST16(1.01f,14), st->min_level)), st->level);
1838 if (st->max_level < st->min_level+1)
1839 st->max_level = st->min_level+1;
1840 /*printf ("%f %f %f %d\n", og, st->min_level, st->max_level, update);*/
1842 /* Store the LSPs for interpolation in the next frame */
1843 for (i=0;i<st->lpcSize;i++)
1844 st->old_qlsp[i] = qlsp[i];
1846 /* The next frame will not be the first (Duh!) */
1849 st->last_pitch = best_pitch;
1851 st->last_pitch_gain = PSHR16(pitch_average,2);
1853 st->last_pitch_gain = .25*pitch_average;
1855 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
1856 if (st->pitch_gain_buf_idx > 2) /* rollover */
1857 st->pitch_gain_buf_idx = 0;
1859 st->last_ol_gain = ol_gain;
1863 #endif /* DISABLE_DECODER */