/* Encoder functions */
/****************************************/
-opus_int silk_Get_Encoder_Size( int *encSizeBytes )
+opus_int silk_Get_Encoder_Size( opus_int *encSizeBytes )
{
opus_int ret = SILK_NO_ERROR;
opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol;
silk_encoder *psEnc = ( silk_encoder * )encState;
opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY];
- opus_int transition, delay;
+ opus_int transition, delay, curr_block, tot_blocks;
psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
return ret;
}
+ encControl->switchReady = 0;
+
if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
/* Mono -> Stereo transition: init state of second channel and stereo state */
ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] );
silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
- silk_memset( psEnc->sStereo.mid_side_amp_Q0, 0, sizeof( psEnc->sStereo.mid_side_amp_Q0 ) );
+ psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
+ psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
+ psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
+ psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
psEnc->sStereo.width_prev_Q14 = 0;
psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
if( psEnc->nChannelsAPI == 2 ) {
psEnc->nChannelsInternal = encControl->nChannelsInternal;
nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
+ tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
+ curr_block = 0;
if( prefillFlag ) {
/* Only accept input length of 10 ms */
if( nBlocksOf10ms != 1 ) {
TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 );
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
- /* JMV: Force the side channel to the same rate as the mid. Is this the right way? */
- int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
+ /* Force the side channel to the same rate as the mid */
+ opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
silk_assert( 0 );
return ret;
}
- if (psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition)
- {
+ if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
}
}
+ psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
}
silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
/* Resample and write to buffer */
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
- int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+ opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n+delay ] = samplesIn[ 2 * n ];
+ buf[ n+delay ] = samplesIn[ 2 * n ];
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
if(psEnc->nPrevChannelsInternal == 1 && id==0) {
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));
}
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n+delay ] = samplesIn[ 2 * n + 1 ];
+ buf[ n + delay ] = samplesIn[ 2 * n + 1 ];
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
- silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
} else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
/* Combine left and right channels before resampling */
for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 );
+ buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 );
}
if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) {
for ( n = 0; n<MAX_ENCODER_DELAY; n++ )
psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1);
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
/* On the first mono frame, average the results for the two resampler states */
silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
+ psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
}
-
}
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
} else {
silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
- silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
}
for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
+ opus_int condCoding;
+
if( encControl->nChannelsInternal == 2 && n == 0 ) {
silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
/* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
}
}
- silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1 );
+ /* Use conditional coding if previous frame available */
+ if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
+ condCoding = CODE_CONDITIONALLY;
+ } else {
+ condCoding = CODE_INDEPENDENTLY;
+ }
+ silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
}
psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+ if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ /* Reset side channel encoder memory for first frame with side coding */
+ if( psEnc->prev_decode_only_middle == 1 ) {
+ silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+ silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+ psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
+ psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
+ psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
+ psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
+ }
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+ } else {
+ psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+ }
if( !prefillFlag ) {
silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
- silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ }
}
} else {
/* Buffering */
silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
}
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
/* Encode */
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ opus_int maxBits, useCBR;
+
+ /* Handling rate constraints */
+ maxBits = encControl->maxBits;
+ if( tot_blocks == 2 && curr_block == 0 ) {
+ maxBits = maxBits * 3 / 5;
+ } else if( tot_blocks == 3 ) {
+ if( curr_block == 0 ) {
+ maxBits = maxBits * 2 / 5;
+ } else if( curr_block == 1 ) {
+ maxBits = maxBits * 3 / 4;
+ }
+ }
+ useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
+
if( encControl->nChannelsInternal == 1 ) {
channelRate_bps = TargetRate_bps;
} else {
channelRate_bps = MStargetRates_bps[ n ];
+ if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
+ useCBR = 0;
+ /* Give mid up to 1/2 of the max bits for that frame */
+ maxBits -= encControl->maxBits / ( tot_blocks * 2 );
+ }
}
if( channelRate_bps > 0 ) {
+ opus_int condCoding;
+
silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
- if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc ) ) != 0 ) {
+ /* Use independent coding if no previous frame available */
+ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
+ condCoding = CODE_INDEPENDENTLY;
+ } else if( n > 0 && psEnc->prev_decode_only_middle ) {
+ /* If we skipped a side frame in this packet, we don't
+ need LTP scaling; the LTP state is well-defined. */
+ condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+ } else {
+ condCoding = CODE_CONDITIONALLY;
+ }
+ if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
silk_assert( 0 );
}
- psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
}
psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
+ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
}
+ psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
/* Insert VAD and FEC flags at beginning of bitstream */
if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
} else {
break;
}
+ curr_block++;
}
+
psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
- encControl->stereoWidth_Q14 = psEnc->sStereo.effective_width_prev_Q14;
+ encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
if( prefillFlag ) {
encControl->payloadSize_ms = tmp_payloadSize_ms;
encControl->complexity = tmp_complexity;