1 /* Copyright (C) 2002 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "quant_lsp.h"
42 #include "cb_search.h"
44 #include "stack_alloc.h"
46 #include <speex/speex_bits.h>
49 #include <speex/speex_callbacks.h>
54 #define M_PI 3.14159265358979323846 /* pi */
61 #define SUBMODE(x) st->submodes[st->submodeID]->x
65 const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927};
66 const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560};
67 const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740};
68 const spx_word16_t exc_gain_quant_scal1_bound[1]={14385};
69 const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224};
72 #define LSP_DELTA1 6553
73 #define LSP_DELTA2 1638
77 const float exc_gain_quant_scal3_bound[7]={0.112338, 0.236980, 0.369316, 0.492054, 0.637471, 0.828874, 1.132784};
78 const float exc_gain_quant_scal3[8]={0.061130, 0.163546, 0.310413, 0.428220, 0.555887, 0.719055, 0.938694, 1.326874};
79 const float exc_gain_quant_scal1_bound[1]={0.87798};
80 const float exc_gain_quant_scal1[2]={0.70469, 1.05127};
82 #define LSP_MARGIN .002
84 #define LSP_DELTA2 .05
91 #define sqr(x) ((x)*(x))
93 void *nb_encoder_init(const SpeexMode *m)
96 const SpeexNBMode *mode;
99 mode=(SpeexNBMode *)m->mode;
100 st = (EncState*)speex_alloc(sizeof(EncState)+8000*sizeof(spx_sig_t));
104 st->stack = ((char*)st) + sizeof(EncState);
108 st->frameSize = mode->frameSize;
109 st->windowSize = st->frameSize*3/2;
110 st->nbSubframes=mode->frameSize/mode->subframeSize;
111 st->subframeSize=mode->subframeSize;
112 st->lpcSize = mode->lpcSize;
113 st->bufSize = mode->bufSize;
114 st->gamma1=mode->gamma1;
115 st->gamma2=mode->gamma2;
116 st->min_pitch=mode->pitchStart;
117 st->max_pitch=mode->pitchEnd;
118 st->lag_factor=mode->lag_factor;
119 st->lpc_floor = mode->lpc_floor;
121 st->submodes=mode->submodes;
122 st->submodeID=st->submodeSelect=mode->defaultSubmode;
123 st->bounded_pitch = 1;
125 st->encode_submode = 1;
127 st->lbr_48k=mode->lbr48k;
130 /* Allocating input buffer */
131 st->inBuf = PUSH(st->stack, st->bufSize, spx_sig_t);
132 st->frame = st->inBuf + st->bufSize - st->windowSize;
133 /* Allocating excitation buffer */
134 st->excBuf = PUSH(st->stack, st->bufSize, spx_sig_t);
135 st->exc = st->excBuf + st->bufSize - st->windowSize;
136 st->swBuf = PUSH(st->stack, st->bufSize, spx_sig_t);
137 st->sw = st->swBuf + st->bufSize - st->windowSize;
139 st->exc2Buf = PUSH(st->stack, st->bufSize, spx_sig_t);
140 st->exc2 = st->exc2Buf + st->bufSize - st->windowSize;
142 st->innov = PUSH(st->stack, st->frameSize, spx_sig_t);
144 /* Asymmetric "pseudo-Hamming" window */
147 part1=st->frameSize - (st->subframeSize>>1);
148 part2=(st->frameSize>>1) + (st->subframeSize>>1);
149 st->window = PUSH(st->stack, st->windowSize, spx_word16_t);
150 for (i=0;i<part1;i++)
151 st->window[i]=(spx_word16_t)(SIG_SCALING*(.54-.46*cos(M_PI*i/part1)));
152 for (i=0;i<part2;i++)
153 st->window[part1+i]=(spx_word16_t)(SIG_SCALING*(.54+.46*cos(M_PI*i/part2)));
155 /* Create the window for autocorrelation (lag-windowing) */
156 st->lagWindow = PUSH(st->stack, st->lpcSize+1, spx_word16_t);
157 for (i=0;i<st->lpcSize+1;i++)
158 st->lagWindow[i]=16384*exp(-.5*sqr(2*M_PI*st->lag_factor*i));
160 st->autocorr = PUSH(st->stack, st->lpcSize+1, spx_word16_t);
162 st->buf2 = PUSH(st->stack, st->windowSize, spx_sig_t);
164 st->lpc = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
165 st->interp_lpc = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
166 st->interp_qlpc = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
167 st->bw_lpc1 = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
168 st->bw_lpc2 = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
170 st->lsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
171 st->qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
172 st->old_lsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
173 st->old_qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
174 st->interp_lsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
175 st->interp_qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
178 for (i=0;i<st->lpcSize;i++)
180 st->lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1);
183 st->mem_sp = PUSH(st->stack, st->lpcSize, spx_mem_t);
184 st->mem_sw = PUSH(st->stack, st->lpcSize, spx_mem_t);
185 st->mem_sw_whole = PUSH(st->stack, st->lpcSize, spx_mem_t);
186 st->mem_exc = PUSH(st->stack, st->lpcSize, spx_mem_t);
188 st->pi_gain = PUSH(st->stack, st->nbSubframes, spx_word32_t);
190 st->pitch = PUSH(st->stack, st->nbSubframes, int);
192 st->vbr = PUSHS(st->stack, VBRState);
202 st->sampling_rate=8000;
205 #ifdef ENABLE_VALGRIND
206 VALGRIND_MAKE_READABLE(st, (st->stack-(char*)st));
211 void nb_encoder_destroy(void *state)
213 EncState *st=(EncState *)state;
214 /* Free all allocated memory */
216 vbr_destroy(st->vbr);
218 /*Free state memory... should be last*/
222 int nb_encode(void *state, void *vin, SpeexBits *bits)
227 spx_word16_t ol_pitch_coef;
228 spx_word32_t ol_gain;
229 spx_sig_t *res, *target;
238 spx_word16_t *in = vin;
240 st=(EncState *)state;
243 /* Copy new data in input buffer */
244 speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
245 for (i=0;i<st->frameSize;i++)
246 st->inBuf[st->bufSize-st->frameSize+i] = SHL((int)in[i], SIG_SHIFT);
248 /* Move signals 1 frame towards the past */
249 speex_move(st->exc2Buf, st->exc2Buf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
250 speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
251 speex_move(st->swBuf, st->swBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
256 w_sig = PUSH(stack, st->windowSize, spx_word16_t);
257 /* Window for analysis */
258 for (i=0;i<st->windowSize;i++)
259 w_sig[i] = SHR(MULT16_16(SHR((spx_word32_t)(st->frame[i]),SIG_SHIFT),st->window[i]),SIG_SHIFT);
261 /* Compute auto-correlation */
262 _spx_autocorr(w_sig, st->autocorr, st->lpcSize+1, st->windowSize);
264 st->autocorr[0] = (spx_word16_t) (st->autocorr[0]*st->lpc_floor); /* Noise floor in auto-correlation domain */
266 /* Lag windowing: equivalent to filtering in the power-spectrum domain */
267 for (i=0;i<st->lpcSize+1;i++)
268 st->autocorr[i] = MULT16_16_Q14(st->autocorr[i],st->lagWindow[i]);
270 /* Levinson-Durbin */
271 _spx_lpc(st->lpc+1, st->autocorr, st->lpcSize);
272 st->lpc[0]=(spx_coef_t)LPC_SCALING;
274 /* LPC to LSPs (x-domain) transform */
275 roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, LSP_DELTA1, stack);
276 /* Check if we found all the roots */
277 if (roots!=st->lpcSize)
279 /* Search again if we can afford it */
280 if (st->complexity>1)
281 roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, LSP_DELTA2, stack);
282 if (roots!=st->lpcSize)
284 /*If we can't find all LSP's, do some damage control and use previous filter*/
285 for (i=0;i<st->lpcSize;i++)
287 st->lsp[i]=st->old_lsp[i];
294 /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
297 for (i=0;i<st->lpcSize;i++)
298 st->interp_lsp[i] = st->lsp[i];
300 lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
302 lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN);
304 /* Compute interpolated LPCs (unquantized) for whole frame*/
305 lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
309 if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
310 SUBMODE(lbr_pitch) != -1)
313 spx_word16_t nol_pitch_coef[6];
315 bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
316 bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
318 filter_mem2(st->frame, st->bw_lpc1, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole);
320 open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
321 nol_pitch, nol_pitch_coef, 6, stack);
322 ol_pitch=nol_pitch[0];
323 ol_pitch_coef = nol_pitch_coef[0];
324 /*Try to remove pitch multiples*/
328 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
330 if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
332 (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
333 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
335 /*ol_pitch_coef=nol_pitch_coef[i];*/
336 ol_pitch = nol_pitch[i];
341 /*ol_pitch_coef = sqrt(ol_pitch_coef);*/
346 if (ol_pitch < st->min_pitch+2)
347 ol_pitch = st->min_pitch+2;
348 if (ol_pitch > st->max_pitch-2)
349 ol_pitch = st->max_pitch-2;
350 open_loop_nbest_pitch(st->sw, ol_pitch-2, ol_pitch+2, st->frameSize>>1,
351 &pitch_half[0], nol_pitch_coef, 1, stack);
352 open_loop_nbest_pitch(st->sw+(st->frameSize>>1), pitch_half[0]-1, pitch_half[0]+2, st->frameSize>>1,
353 &pitch_half[1], nol_pitch_coef, 1, stack);
360 /*Compute "real" excitation*/
361 fir_mem2(st->frame, st->interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc);
363 /* Compute open-loop excitation gain */
369 ol1 = compute_rms(st->exc, st->frameSize>>1);
370 ol2 = compute_rms(st->exc+(st->frameSize>>1), st->frameSize>>1);
371 ol1 *= ol1*(st->frameSize>>1);
372 ol2 *= ol2*(st->frameSize>>1);
377 ol_gain2 = sqrt(2*ol_gain2*(ol1+ol2))*1.3*(1-.5*GAIN_SCALING_1*GAIN_SCALING_1*ol_pitch_coef*ol_pitch_coef);
379 ol_gain=SHR(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT);
383 ol_gain = SHL((spx_word32_t)compute_rms(st->exc, st->frameSize),SIG_SHIFT);
390 if (st->vbr && (st->vbr_enabled||st->vad_enabled))
393 for (i=0;i<st->lpcSize;i++)
394 lsp_dist += (st->old_lsp[i] - st->lsp[i])*(st->old_lsp[i] - st->lsp[i]);
395 lsp_dist /= LSP_SCALING*LSP_SCALING;
400 if (st->abr_drift2 * st->abr_drift > 0)
402 /* Only adapt if long-term and short-term drift are the same sign */
403 qual_change = -.00001*st->abr_drift/(1+st->abr_count);
406 if (qual_change<-.05)
409 st->vbr_quality += qual_change;
410 if (st->vbr_quality>10)
412 if (st->vbr_quality<0)
416 st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
417 /*if (delta_qual<0)*/
418 /* delta_qual*=.1*(3+st->vbr_quality);*/
429 v1=(int)floor(st->vbr_quality);
431 thresh = vbr_nb_thresh[mode][v1];
433 thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
434 if (st->relative_quality > thresh &&
435 st->relative_quality-thresh<min_diff)
438 min_diff = st->relative_quality-thresh;
445 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
457 speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
462 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
463 st->abr_drift+=(bitrate-st->abr_enabled);
464 st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
465 st->abr_count += 1.0;
471 if (st->relative_quality<2)
473 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
483 mode=st->submodeSelect;
485 /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
489 st->relative_quality = -1;
492 if (st->encode_submode)
498 /* First, transmit a zero for narrowband */
499 speex_bits_pack(bits, 0, 1);
501 /* Transmit the sub-mode we use for this frame */
502 speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
509 /* If null mode (no transmission), just set a couple things to zero*/
510 if (st->submodes[st->submodeID] == NULL)
512 for (i=0;i<st->frameSize;i++)
513 st->exc[i]=st->exc2[i]=st->sw[i]=VERY_SMALL;
515 for (i=0;i<st->lpcSize;i++)
518 st->bounded_pitch = 1;
520 /* Final signal synthesis from excitation */
521 iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
523 for (i=0;i<st->frameSize;i++)
530 /* LSP Quantization */
533 for (i=0;i<st->lpcSize;i++)
534 st->old_lsp[i] = st->lsp[i];
539 #if 1 /*0 for unquantized*/
540 SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits);
542 for (i=0;i<st->lpcSize;i++)
543 st->qlsp[i]=st->lsp[i];
548 speex_bits_pack(bits, pitch_half[0]-st->min_pitch, 7);
549 speex_bits_pack(bits, pitch_half[1]-pitch_half[0]+1, 2);
552 int quant = (int)floor(.5+7.4*GAIN_SCALING_1*ol_pitch_coef);
558 speex_bits_pack(bits, quant, 3);
559 ol_pitch_coef=GAIN_SCALING*0.13514*quant;
563 int qe = (int)(floor(.5+2.1*log(ol_gain*1.0/SIG_SCALING)))-2;
568 ol_gain = exp((qe+2)/2.1)*SIG_SCALING;
569 speex_bits_pack(bits, qe, 4);
575 /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
576 if (SUBMODE(lbr_pitch)!=-1)
578 speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
581 if (SUBMODE(forced_pitch_gain))
584 quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
589 speex_bits_pack(bits, quant, 4);
590 ol_pitch_coef=GAIN_SCALING*0.066667*quant;
594 /*Quantize and transmit open-loop excitation gain*/
597 int qe = scal_quant32(ol_gain, ol_gain_table, 32);
598 /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
599 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
600 speex_bits_pack(bits, qe, 5);
604 int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
609 ol_gain = exp(qe/3.5)*SIG_SCALING;
610 speex_bits_pack(bits, qe, 5);
620 /* Special case for first frame */
623 for (i=0;i<st->lpcSize;i++)
624 st->old_qlsp[i] = st->qlsp[i];
627 /* Filter response */
628 res = PUSH(stack, st->subframeSize, spx_sig_t);
630 target = PUSH(stack, st->subframeSize, spx_sig_t);
631 syn_resp = PUSH(stack, st->subframeSize, spx_sig_t);
632 real_exc = PUSH(stack, st->subframeSize, spx_sig_t);
633 mem = PUSH(stack, st->lpcSize, spx_mem_t);
635 /* Loop on sub-frames */
636 for (sub=0;sub<st->nbSubframes;sub++)
639 spx_sig_t *sp, *sw, *exc, *exc2;
641 int response_bound = st->subframeSize;
645 if (sub*2 < st->nbSubframes)
646 ol_pitch = pitch_half[0];
648 ol_pitch = pitch_half[1];
652 /* Offset relative to start of frame */
653 offset = st->subframeSize*sub;
654 /* Original signal */
658 /* Weighted signal */
661 exc2=st->exc2+offset;
664 /* LSP interpolation (quantized and unquantized) */
665 lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes);
666 lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes);
668 /* Make sure the filters are stable */
669 lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN);
670 lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN);
672 /* Compute interpolated LPCs (quantized and unquantized) */
673 lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
675 lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
677 /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
679 spx_word32_t pi_g=st->interp_qlpc[0];
680 for (i=1;i<=st->lpcSize;i+=2)
682 pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];
684 st->pi_gain[sub] = pi_g;
688 /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
689 bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
691 bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
695 for (i=1;i<=st->lpcSize;i++)
699 for (i=0;i<st->subframeSize;i++)
700 real_exc[i] = exc[i];
702 /* Compute impulse response of A(z/g1) / ( A(z)*A(z/g2) )*/
703 for (i=0;i<st->subframeSize;i++)
707 if (st->complexity==0)
708 response_bound >>= 1;
709 syn_percep_zero(exc, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
710 for (i=response_bound;i<st->subframeSize;i++)
713 /* Reset excitation */
714 for (i=0;i<st->subframeSize;i++)
716 for (i=0;i<st->subframeSize;i++)
719 /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
720 for (i=0;i<st->lpcSize;i++)
721 mem[i]=st->mem_sp[i];
722 iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem);
723 for (i=0;i<st->lpcSize;i++)
724 mem[i]=st->mem_sw[i];
725 filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem);
727 /* Compute weighted signal */
728 for (i=0;i<st->lpcSize;i++)
729 mem[i]=st->mem_sw[i];
730 filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem);
732 if (st->complexity==0)
733 for (i=0;i<st->lpcSize;i++)
734 st->mem_sw[i]=mem[i];
736 /* Compute target signal */
737 for (i=0;i<st->subframeSize;i++)
738 target[i]=sw[i]-res[i];
740 for (i=0;i<st->subframeSize;i++)
743 /* If we have a long-term predictor (otherwise, something's wrong) */
744 if (SUBMODE(ltp_quant))
746 int pit_min, pit_max;
747 /* Long-term prediction */
748 if (SUBMODE(lbr_pitch) != -1)
750 /* Low bit-rate pitch handling */
752 margin = SUBMODE(lbr_pitch);
755 if (ol_pitch < st->min_pitch+margin-1)
756 ol_pitch=st->min_pitch+margin-1;
757 if (ol_pitch > st->max_pitch-margin)
758 ol_pitch=st->max_pitch-margin;
759 pit_min = ol_pitch-margin+1;
760 pit_max = ol_pitch+margin;
762 pit_min=pit_max=ol_pitch;
765 pit_min = st->min_pitch;
766 pit_max = st->max_pitch;
769 /* Force pitch to use only the current frame if needed */
770 if (st->bounded_pitch && pit_max>offset)
776 pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
777 exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
778 st->lpcSize, st->subframeSize, bits, stack,
779 exc2, syn_resp, st->complexity, ol_pitch_id);
783 /* Perform pitch search */
784 pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
785 exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
786 st->lpcSize, st->subframeSize, bits, stack,
787 exc2, syn_resp, st->complexity, 0);
792 st->pitch[sub]=pitch;
794 speex_error ("No pitch prediction, what's wrong");
797 /* Update target for adaptive codebook contribution */
798 /* FIXME: We shouldn't have to apply the filter again (compute directly in the pitch quantizer) */
799 /*syn_percep_zero(exc, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, stack);
800 for (i=0;i<st->subframeSize;i++)
801 target[i]=SATURATE(SUB32(target[i],res[i]),805306368);
803 /* Quantization of innovation */
807 spx_word16_t fine_gain;
809 innov = st->innov+sub*st->subframeSize;
810 for (i=0;i<st->subframeSize;i++)
813 /*FIXME: Check that I'm really allowed to replace the residue_percep_zero */
814 for (i=0;i<st->subframeSize;i++)
815 real_exc[i] = SUB32(real_exc[i], exc[i]);
816 /*residue_percep_zero(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, st->buf2, st->subframeSize, st->lpcSize, stack);*/
818 ener = SHL((spx_word32_t)compute_rms(real_exc, st->subframeSize),SIG_SHIFT);
820 /*for (i=0;i<st->subframeSize;i++)
821 printf ("%f\n", st->buf2[i]/ener);
824 /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
827 spx_word32_t f = DIV32(ener,PSHR(ol_gain,SIG_SHIFT));
834 fine_gain = DIV32_16(ener,PSHR(ol_gain,SIG_SHIFT));
836 /* Calculate gain correction for the sub-frame (if any) */
837 if (SUBMODE(have_subframe_gain))
840 if (SUBMODE(have_subframe_gain)==3)
842 qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
843 speex_bits_pack(bits, qe, 3);
844 ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
846 qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
847 speex_bits_pack(bits, qe, 1);
848 ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
854 /*printf ("%f %f\n", ener, ol_gain);*/
856 /* Normalize innovation */
857 signal_div(target, target, ener, st->subframeSize);
859 /* Quantize innovation */
860 if (SUBMODE(innovation_quant))
862 /* Codebook search */
863 SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
864 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
865 innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
867 /* De-normalize innovation and update excitation */
868 signal_mul(innov, innov, ener, st->subframeSize);
870 for (i=0;i<st->subframeSize;i++)
873 speex_error("No fixed codebook");
876 /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
877 if (SUBMODE(double_codebook)) {
878 char *tmp_stack=stack;
879 spx_sig_t *innov2 = PUSH(tmp_stack, st->subframeSize, spx_sig_t);
880 for (i=0;i<st->subframeSize;i++)
882 for (i=0;i<st->subframeSize;i++)
884 SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
885 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
886 innov2, syn_resp, bits, tmp_stack, st->complexity, 0);
887 signal_mul(innov2, innov2, (spx_word32_t) (ener*(1/2.2)), st->subframeSize);
888 for (i=0;i<st->subframeSize;i++)
892 /* FIXME: I can remove that, right? */
893 /*signal_mul(target, target, ener, st->subframeSize);*/
896 /* Final signal synthesis from excitation */
897 iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);
899 /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
900 if (st->complexity!=0)
901 filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw);
903 for (i=0;i<st->subframeSize;i++)
907 /* Store the LSPs for interpolation in the next frame */
908 if (st->submodeID>=1)
910 for (i=0;i<st->lpcSize;i++)
911 st->old_lsp[i] = st->lsp[i];
912 for (i=0;i<st->lpcSize;i++)
913 st->old_qlsp[i] = st->qlsp[i];
916 if (st->submodeID==1)
919 speex_bits_pack(bits, 15, 4);
921 speex_bits_pack(bits, 0, 4);
924 /* The next frame will not be the first (Duh!) */
927 /* Replace input by synthesized speech */
928 for (i=0;i<st->frameSize;i++)
930 spx_word32_t sig = PSHR(st->frame[i],SIG_SHIFT);
938 if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
939 st->bounded_pitch = 1;
941 st->bounded_pitch = 0;
947 void *nb_decoder_init(const SpeexMode *m)
950 const SpeexNBMode *mode;
953 mode=(SpeexNBMode*)m->mode;
954 st = (DecState *)speex_alloc(sizeof(DecState)+4000*sizeof(spx_sig_t));
957 st->stack = ((char*)st) + sizeof(DecState);
959 st->encode_submode = 1;
961 st->lbr_48k=mode->lbr48k;
965 /* Codec parameters, should eventually have several "modes"*/
966 st->frameSize = mode->frameSize;
967 st->windowSize = st->frameSize*3/2;
968 st->nbSubframes=mode->frameSize/mode->subframeSize;
969 st->subframeSize=mode->subframeSize;
970 st->lpcSize = mode->lpcSize;
971 st->bufSize = mode->bufSize;
972 st->min_pitch=mode->pitchStart;
973 st->max_pitch=mode->pitchEnd;
975 st->submodes=mode->submodes;
976 st->submodeID=mode->defaultSubmode;
978 st->lpc_enh_enabled=0;
981 st->inBuf = PUSH(st->stack, st->bufSize, spx_sig_t);
982 st->frame = st->inBuf + st->bufSize - st->windowSize;
983 st->excBuf = PUSH(st->stack, st->bufSize, spx_sig_t);
984 st->exc = st->excBuf + st->bufSize - st->windowSize;
985 for (i=0;i<st->bufSize;i++)
987 for (i=0;i<st->bufSize;i++)
989 st->innov = PUSH(st->stack, st->frameSize, spx_sig_t);
991 st->interp_qlpc = PUSH(st->stack, st->lpcSize+1, spx_coef_t);
992 st->qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
993 st->old_qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
994 st->interp_qlsp = PUSH(st->stack, st->lpcSize, spx_lsp_t);
995 st->mem_sp = PUSH(st->stack, 5*st->lpcSize, spx_mem_t);
996 st->comb_mem = PUSHS(st->stack, CombFilterMem);
997 comb_filter_mem_init (st->comb_mem);
999 st->pi_gain = PUSH(st->stack, st->nbSubframes, spx_word32_t);
1000 st->last_pitch = 40;
1002 st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
1003 st->pitch_gain_buf_idx = 0;
1005 st->sampling_rate=8000;
1006 st->last_ol_gain = 0;
1008 st->user_callback.func = &speex_default_user_handler;
1009 st->user_callback.data = NULL;
1011 st->speex_callbacks[i].func = NULL;
1013 st->voc_m1=st->voc_m2=st->voc_mean=0;
1016 #ifdef ENABLE_VALGRIND
1017 VALGRIND_MAKE_READABLE(st, (st->stack-(char*)st));
1022 void nb_decoder_destroy(void *state)
1025 st=(DecState*)state;
1030 #define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
1032 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack)
1035 spx_coef_t *awk1, *awk2, *awk3;
1036 float pitch_gain, fact;
1037 spx_word16_t gain_med;
1039 fact = exp(-.04*st->count_lost*st->count_lost);
1040 gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
1041 if (gain_med < st->last_pitch_gain)
1042 st->last_pitch_gain = gain_med;
1044 pitch_gain = GAIN_SCALING_1*st->last_pitch_gain;
1050 /* Shift all buffers by one frame */
1051 speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
1052 speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
1054 awk1=PUSH(stack, (st->lpcSize+1), spx_coef_t);
1055 awk2=PUSH(stack, (st->lpcSize+1), spx_coef_t);
1056 awk3=PUSH(stack, (st->lpcSize+1), spx_coef_t);
1058 for (sub=0;sub<st->nbSubframes;sub++)
1061 spx_sig_t *sp, *exc;
1062 /* Offset relative to start of frame */
1063 offset = st->subframeSize*sub;
1064 /* Original signal */
1065 sp=st->frame+offset;
1068 /* Excitation after post-filter*/
1070 /* Calculate perceptually enhanced LPC filter */
1071 if (st->lpc_enh_enabled)
1073 spx_word16_t k1,k2,k3;
1074 if (st->submodes[st->submodeID] != NULL)
1076 k1=SUBMODE(lpc_enh_k1);
1077 k2=SUBMODE(lpc_enh_k2);
1078 k3=SUBMODE(lpc_enh_k3);
1080 k1=k2=.7*GAMMA_SCALING;
1083 bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
1084 bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
1085 bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
1088 /* Make up a plausible excitation */
1089 /* FIXME: THIS CAN BE IMPROVED */
1090 /*if (pitch_gain>.95)
1094 innov_gain = compute_rms(st->innov, st->frameSize);
1095 for (i=0;i<st->subframeSize;i++)
1098 exc[i] = pitch_gain * exc[i - st->last_pitch] + fact*sqrt(1-pitch_gain)*st->innov[i+offset];
1099 /*Just so it give the same lost packets as with if 0*/
1102 /*exc[i]=pitch_gain*exc[i-st->last_pitch] + fact*st->innov[i+offset];*/
1103 exc[i]=pitch_gain*exc[i-st->last_pitch] +
1104 fact*sqrt(1-pitch_gain)*speex_rand(innov_gain);
1108 for (i=0;i<st->subframeSize;i++)
1111 /* Signal synthesis */
1112 if (st->lpc_enh_enabled)
1114 filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
1115 st->mem_sp+st->lpcSize);
1116 filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1119 for (i=0;i<st->lpcSize;i++)
1120 st->mem_sp[st->lpcSize+i] = 0;
1121 iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1126 for (i=0;i<st->frameSize;i++)
1128 spx_word32_t sig = PSHR(st->frame[i],SIG_SHIFT);
1138 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = GAIN_SCALING*pitch_gain;
1139 if (st->pitch_gain_buf_idx > 2) /* rollover */
1140 st->pitch_gain_buf_idx = 0;
1143 int nb_decode(void *state, SpeexBits *bits, void *vout)
1148 spx_word16_t pitch_gain[3];
1149 spx_word32_t ol_gain=0;
1151 spx_word16_t ol_pitch_coef=0;
1153 spx_word16_t best_pitch_gain=0;
1157 spx_coef_t *awk1, *awk2, *awk3;
1158 spx_word16_t pitch_average=0;
1163 spx_word16_t *out = vout;
1165 st=(DecState*)state;
1168 if (st->encode_submode)
1174 /* Check if we're in DTX mode*/
1175 if (!bits && st->dtx_enabled)
1180 /* If bits is NULL, consider the packet to be lost (what could we do anyway) */
1183 nb_decode_lost(st, out, stack);
1187 /* Search for next narrowband block (handle requests, skip wideband blocks) */
1189 if (speex_bits_remaining(bits)<5)
1191 wideband = speex_bits_unpack_unsigned(bits, 1);
1192 if (wideband) /* Skip wideband block (for compatibility) */
1196 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1197 speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);
1200 speex_warning ("Invalid wideband mode encountered. Corrupted stream?");
1203 advance -= (SB_SUBMODE_BITS+1);
1204 speex_bits_advance(bits, advance);
1206 if (speex_bits_remaining(bits)<5)
1208 wideband = speex_bits_unpack_unsigned(bits, 1);
1211 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1212 speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);
1215 speex_warning ("Invalid wideband mode encountered: corrupted stream?");
1218 advance -= (SB_SUBMODE_BITS+1);
1219 speex_bits_advance(bits, advance);
1220 wideband = speex_bits_unpack_unsigned(bits, 1);
1223 speex_warning ("More than two wideband layers found: corrupted stream?");
1229 if (speex_bits_remaining(bits)<4)
1231 /* FIXME: Check for overflow */
1232 m = speex_bits_unpack_unsigned(bits, 4);
1233 if (m==15) /* We found a terminator */
1236 } else if (m==14) /* Speex in-band request */
1238 int ret = speex_inband_handler(bits, st->speex_callbacks, state);
1241 } else if (m==13) /* User in-band request */
1243 int ret = st->user_callback.func(bits, state, st->user_callback.data);
1246 } else if (m>8) /* Invalid mode */
1248 speex_warning("Invalid mode encountered: corrupted stream?");
1254 /* Get the sub-mode that was used */
1263 /* Shift all buffers by one frame */
1264 speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
1265 speex_move(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));
1267 /* If null mode (no transmission), just set a couple things to zero*/
1268 if (st->submodes[st->submodeID] == NULL)
1271 lpc = PUSH(stack,11, spx_coef_t);
1272 bw_lpc(GAMMA_SCALING*.93, st->interp_qlpc, lpc, 10);
1273 /*for (i=0;i<st->frameSize;i++)
1277 float pgain=GAIN_SCALING_1*st->last_pitch_gain;
1280 innov_gain = compute_rms(st->innov, st->frameSize);
1281 for (i=0;i<st->frameSize;i++)
1283 speex_rand_vec(innov_gain, st->exc, st->frameSize);
1289 /* Final signal synthesis from excitation */
1290 iir_mem2(st->exc, lpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
1292 for (i=0;i<st->frameSize;i++)
1294 spx_word32_t sig = PSHR(st->frame[i],SIG_SHIFT);
1306 /* Unquantize LSPs */
1307 SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits);
1309 /*Damp memory if a frame was lost and the LSP changed too much*/
1312 float lsp_dist=0, fact;
1313 for (i=0;i<st->lpcSize;i++)
1314 lsp_dist += fabs(st->old_qlsp[i] - st->qlsp[i]);
1315 lsp_dist /= LSP_SCALING*LSP_SCALING;
1316 fact = .6*exp(-.2*lsp_dist);
1317 for (i=0;i<2*st->lpcSize;i++)
1318 st->mem_sp[i] = (spx_mem_t)(st->mem_sp[i]*fact);
1322 /* Handle first frame and lost-packet case */
1323 if (st->first || st->count_lost)
1325 for (i=0;i<st->lpcSize;i++)
1326 st->old_qlsp[i] = st->qlsp[i];
1331 pitch_half[0] = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1332 pitch_half[1] = pitch_half[0]+speex_bits_unpack_unsigned(bits, 2)-1;
1334 ol_pitch_id = speex_bits_unpack_unsigned(bits, 3);
1335 ol_pitch_coef=GAIN_SCALING*0.13514*ol_pitch_id;
1339 qe = speex_bits_unpack_unsigned(bits, 4);
1340 ol_gain = SIG_SCALING*exp((qe+2)/2.1),SIG_SHIFT;
1346 /* Get open-loop pitch estimation for low bit-rate pitch coding */
1347 if (SUBMODE(lbr_pitch)!=-1)
1349 ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1352 if (SUBMODE(forced_pitch_gain))
1355 quant = speex_bits_unpack_unsigned(bits, 4);
1356 ol_pitch_coef=GAIN_SCALING*0.066667*quant;
1359 /* Get global excitation gain */
1362 qe = speex_bits_unpack_unsigned(bits, 5);
1364 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
1366 ol_gain = SIG_SCALING*exp(qe/3.5);
1373 awk1=PUSH(stack, st->lpcSize+1, spx_coef_t);
1374 awk2=PUSH(stack, st->lpcSize+1, spx_coef_t);
1375 awk3=PUSH(stack, st->lpcSize+1, spx_coef_t);
1377 if (st->submodeID==1)
1380 extra = speex_bits_unpack_unsigned(bits, 4);
1387 if (st->submodeID>1)
1390 /*Loop on subframes */
1391 for (sub=0;sub<st->nbSubframes;sub++)
1394 spx_sig_t *sp, *exc;
1400 if (sub*2 < st->nbSubframes)
1401 ol_pitch = pitch_half[0];
1403 ol_pitch = pitch_half[1];
1407 /* Offset relative to start of frame */
1408 offset = st->subframeSize*sub;
1409 /* Original signal */
1410 sp=st->frame+offset;
1413 /* Excitation after post-filter*/
1415 /* LSP interpolation (quantized and unquantized) */
1416 lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes);
1418 /* Make sure the LSP's are stable */
1419 lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN);
1422 /* Compute interpolated LPCs (unquantized) */
1423 lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
1425 /* Compute enhanced synthesis filter */
1426 if (st->lpc_enh_enabled)
1428 bw_lpc(SUBMODE(lpc_enh_k1), st->interp_qlpc, awk1, st->lpcSize);
1429 bw_lpc(SUBMODE(lpc_enh_k2), st->interp_qlpc, awk2, st->lpcSize);
1430 bw_lpc(SUBMODE(lpc_enh_k3), st->interp_qlpc, awk3, st->lpcSize);
1433 /* Compute analysis filter at w=pi */
1435 spx_word32_t pi_g=st->interp_qlpc[0];
1436 for (i=1;i<=st->lpcSize;i+=2)
1438 pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];
1440 st->pi_gain[sub] = pi_g;
1443 /* Reset excitation */
1444 for (i=0;i<st->subframeSize;i++)
1447 /*Adaptive codebook contribution*/
1448 if (SUBMODE(ltp_unquant))
1450 int pit_min, pit_max;
1451 /* Handle pitch constraints if any */
1452 if (SUBMODE(lbr_pitch) != -1)
1455 margin = SUBMODE(lbr_pitch);
1458 /* GT - need optimization?
1459 if (ol_pitch < st->min_pitch+margin-1)
1460 ol_pitch=st->min_pitch+margin-1;
1461 if (ol_pitch > st->max_pitch-margin)
1462 ol_pitch=st->max_pitch-margin;
1463 pit_min = ol_pitch-margin+1;
1464 pit_max = ol_pitch+margin;
1466 pit_min = ol_pitch-margin+1;
1467 if (pit_min < st->min_pitch)
1468 pit_min = st->min_pitch;
1469 pit_max = ol_pitch+margin;
1470 if (pit_max > st->max_pitch)
1471 pit_max = st->max_pitch;
1473 pit_min = pit_max = ol_pitch;
1476 pit_min = st->min_pitch;
1477 pit_max = st->max_pitch;
1484 SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1485 st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1486 st->count_lost, offset, st->last_pitch_gain, ol_pitch_id);
1490 SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1491 st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1492 st->count_lost, offset, st->last_pitch_gain, 0);
1499 /* If we had lost frames, check energy of last received frame */
1500 if (st->count_lost && ol_gain < st->last_ol_gain)
1502 float fact = (float)ol_gain/(st->last_ol_gain+1);
1503 for (i=0;i<st->subframeSize;i++)
1507 tmp = gain_3tap_to_1tap(pitch_gain);
1509 pitch_average += tmp;
1510 if (tmp>best_pitch_gain)
1513 best_pitch_gain = tmp;
1516 speex_error("No pitch prediction, what's wrong");
1519 /* Unquantize the innovation */
1525 innov = st->innov+sub*st->subframeSize;
1526 for (i=0;i<st->subframeSize;i++)
1529 /* Decode sub-frame gain correction */
1530 if (SUBMODE(have_subframe_gain)==3)
1532 q_energy = speex_bits_unpack_unsigned(bits, 3);
1533 ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain);
1534 } else if (SUBMODE(have_subframe_gain)==1)
1536 q_energy = speex_bits_unpack_unsigned(bits, 1);
1537 ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain);
1542 if (SUBMODE(innovation_unquant))
1544 /*Fixed codebook contribution*/
1545 SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack);
1547 speex_error("No fixed codebook");
1550 /* De-normalize innovation and update excitation */
1552 signal_mul(innov, innov, ener, st->subframeSize);
1554 signal_mul(innov, innov, ener, st->subframeSize);
1557 if (st->submodeID==1)
1559 float g=ol_pitch_coef*GAIN_SCALING_1;
1562 for (i=0;i<st->subframeSize;i++)
1564 while (st->voc_offset<st->subframeSize)
1566 if (st->voc_offset>=0)
1567 exc[st->voc_offset]=SIG_SCALING*sqrt(1.0*ol_pitch);
1568 st->voc_offset+=ol_pitch;
1570 st->voc_offset -= st->subframeSize;
1577 for (i=0;i<st->subframeSize;i++)
1580 exc[i]=.8*g*exc[i]*ol_gain/SIG_SCALING + .6*g*st->voc_m1*ol_gain/SIG_SCALING + .5*g*innov[i] - .5*g*st->voc_m2 + (1-g)*innov[i];
1582 st->voc_m2=innov[i];
1583 st->voc_mean = .95*st->voc_mean + .05*exc[i];
1584 exc[i]-=st->voc_mean;
1587 for (i=0;i<st->subframeSize;i++)
1589 /*print_vec(exc, 40, "innov");*/
1591 /* Decode second codebook (only for some modes) */
1592 if (SUBMODE(double_codebook))
1594 char *tmp_stack=stack;
1595 spx_sig_t *innov2 = PUSH(tmp_stack, st->subframeSize, spx_sig_t);
1596 for (i=0;i<st->subframeSize;i++)
1598 SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, tmp_stack);
1599 signal_mul(innov2, innov2, (spx_word32_t) (ener*(1/2.2)), st->subframeSize);
1600 for (i=0;i<st->subframeSize;i++)
1601 exc[i] += innov2[i];
1606 for (i=0;i<st->subframeSize;i++)
1609 /* Signal synthesis */
1610 if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0)
1611 comb_filter(exc, sp, st->interp_qlpc, st->lpcSize, st->subframeSize,
1612 pitch, pitch_gain, SUBMODE(comb_gain), st->comb_mem);
1614 if (st->lpc_enh_enabled)
1616 /* Use enhanced LPC filter */
1617 filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
1618 st->mem_sp+st->lpcSize);
1619 filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1622 /* Use regular filter */
1623 for (i=0;i<st->lpcSize;i++)
1624 st->mem_sp[st->lpcSize+i] = 0;
1625 iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1630 /*Copy output signal*/
1631 for (i=0;i<st->frameSize;i++)
1633 spx_word32_t sig = PSHR(st->frame[i],SIG_SHIFT);
1641 /*for (i=0;i<st->frameSize;i++)
1642 printf ("%d\n", (int)st->frame[i]);*/
1644 /* Store the LSPs for interpolation in the next frame */
1645 for (i=0;i<st->lpcSize;i++)
1646 st->old_qlsp[i] = st->qlsp[i];
1648 /* The next frame will not be the first (Duh!) */
1651 st->last_pitch = best_pitch;
1653 st->last_pitch_gain = PSHR(pitch_average,2);
1655 st->last_pitch_gain = .25*pitch_average;
1657 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
1658 if (st->pitch_gain_buf_idx > 2) /* rollover */
1659 st->pitch_gain_buf_idx = 0;
1661 st->last_ol_gain = ol_gain;
1666 int nb_encoder_ctl(void *state, int request, void *ptr)
1669 st=(EncState*)state;
1672 case SPEEX_GET_FRAME_SIZE:
1673 (*(int*)ptr) = st->frameSize;
1675 case SPEEX_SET_LOW_MODE:
1676 case SPEEX_SET_MODE:
1677 st->submodeSelect = st->submodeID = (*(int*)ptr);
1679 case SPEEX_GET_LOW_MODE:
1680 case SPEEX_GET_MODE:
1681 (*(int*)ptr) = st->submodeID;
1684 st->vbr_enabled = (*(int*)ptr);
1687 (*(int*)ptr) = st->vbr_enabled;
1690 st->vad_enabled = (*(int*)ptr);
1693 (*(int*)ptr) = st->vad_enabled;
1696 st->dtx_enabled = (*(int*)ptr);
1699 (*(int*)ptr) = st->dtx_enabled;
1702 st->abr_enabled = (*(int*)ptr);
1703 st->vbr_enabled = 1;
1705 int i=10, rate, target;
1707 target = (*(int*)ptr);
1710 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1711 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1719 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
1727 (*(int*)ptr) = st->abr_enabled;
1729 case SPEEX_SET_VBR_QUALITY:
1730 st->vbr_quality = (*(float*)ptr);
1732 case SPEEX_GET_VBR_QUALITY:
1733 (*(float*)ptr) = st->vbr_quality;
1735 case SPEEX_SET_QUALITY:
1737 int quality = (*(int*)ptr);
1742 st->submodeSelect = st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
1745 case SPEEX_SET_COMPLEXITY:
1746 st->complexity = (*(int*)ptr);
1747 if (st->complexity<0)
1750 case SPEEX_GET_COMPLEXITY:
1751 (*(int*)ptr) = st->complexity;
1753 case SPEEX_SET_BITRATE:
1755 int i=10, rate, target;
1756 target = (*(int*)ptr);
1759 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1760 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1767 case SPEEX_GET_BITRATE:
1768 if (st->submodes[st->submodeID])
1769 (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1771 (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1773 case SPEEX_SET_SAMPLING_RATE:
1774 st->sampling_rate = (*(int*)ptr);
1776 case SPEEX_GET_SAMPLING_RATE:
1777 (*(int*)ptr)=st->sampling_rate;
1779 case SPEEX_RESET_STATE:
1782 st->bounded_pitch = 1;
1784 for (i=0;i<st->lpcSize;i++)
1785 st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1);
1786 for (i=0;i<st->lpcSize;i++)
1787 st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
1788 for (i=0;i<st->bufSize;i++)
1789 st->excBuf[i]=st->swBuf[i]=st->inBuf[i]=st->exc2Buf[i]=0;
1792 case SPEEX_SET_SUBMODE_ENCODING:
1793 st->encode_submode = (*(int*)ptr);
1795 case SPEEX_GET_SUBMODE_ENCODING:
1796 (*(int*)ptr) = st->encode_submode;
1798 case SPEEX_GET_LOOKAHEAD:
1799 (*(int*)ptr)=(st->windowSize-st->frameSize);
1801 case SPEEX_GET_PI_GAIN:
1804 spx_word32_t *g = (spx_word32_t*)ptr;
1805 for (i=0;i<st->nbSubframes;i++)
1806 g[i]=st->pi_gain[i];
1812 spx_sig_t *e = (spx_sig_t*)ptr;
1813 for (i=0;i<st->frameSize;i++)
1817 case SPEEX_GET_INNOV:
1820 spx_sig_t *e = (spx_sig_t*)ptr;
1821 for (i=0;i<st->frameSize;i++)
1825 case SPEEX_GET_RELATIVE_QUALITY:
1826 (*(float*)ptr)=st->relative_quality;
1829 speex_warning_int("Unknown nb_ctl request: ", request);
1835 int nb_decoder_ctl(void *state, int request, void *ptr)
1838 st=(DecState*)state;
1841 case SPEEX_SET_LOW_MODE:
1842 case SPEEX_SET_MODE:
1843 st->submodeID = (*(int*)ptr);
1845 case SPEEX_GET_LOW_MODE:
1846 case SPEEX_GET_MODE:
1847 (*(int*)ptr) = st->submodeID;
1850 st->lpc_enh_enabled = *((int*)ptr);
1853 *((int*)ptr) = st->lpc_enh_enabled;
1855 case SPEEX_GET_FRAME_SIZE:
1856 (*(int*)ptr) = st->frameSize;
1858 case SPEEX_GET_BITRATE:
1859 if (st->submodes[st->submodeID])
1860 (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1862 (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1864 case SPEEX_SET_SAMPLING_RATE:
1865 st->sampling_rate = (*(int*)ptr);
1867 case SPEEX_GET_SAMPLING_RATE:
1868 (*(int*)ptr)=st->sampling_rate;
1870 case SPEEX_SET_HANDLER:
1872 SpeexCallback *c = (SpeexCallback*)ptr;
1873 st->speex_callbacks[c->callback_id].func=c->func;
1874 st->speex_callbacks[c->callback_id].data=c->data;
1875 st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
1878 case SPEEX_SET_USER_HANDLER:
1880 SpeexCallback *c = (SpeexCallback*)ptr;
1881 st->user_callback.func=c->func;
1882 st->user_callback.data=c->data;
1883 st->user_callback.callback_id=c->callback_id;
1886 case SPEEX_RESET_STATE:
1889 for (i=0;i<2*st->lpcSize;i++)
1891 for (i=0;i<st->bufSize;i++)
1892 st->excBuf[i]=st->inBuf[i]=0;
1895 case SPEEX_SET_SUBMODE_ENCODING:
1896 st->encode_submode = (*(int*)ptr);
1898 case SPEEX_GET_SUBMODE_ENCODING:
1899 (*(int*)ptr) = st->encode_submode;
1901 case SPEEX_GET_PI_GAIN:
1904 spx_word32_t *g = (spx_word32_t*)ptr;
1905 for (i=0;i<st->nbSubframes;i++)
1906 g[i]=st->pi_gain[i];
1912 spx_sig_t *e = (spx_sig_t*)ptr;
1913 for (i=0;i<st->frameSize;i++)
1917 case SPEEX_GET_INNOV:
1920 spx_sig_t *e = (spx_sig_t*)ptr;
1921 for (i=0;i<st->frameSize;i++)
1925 case SPEEX_GET_DTX_STATUS:
1926 *((int*)ptr) = st->dtx_enabled;
1929 speex_warning_int("Unknown nb_ctl request: ", request);