1 /* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited
2 Written by Jean-Marc Valin and Koen Vos */
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include "opus_decoder.h"
42 #define MAX_PACKET (1275)
44 /* Make sure everything's aligned to 4 bytes (this may need to be increased
45 on really weird architectures) */
46 static inline int align(int i)
51 int opus_decoder_get_size(int channels)
53 int silkDecSizeBytes, celtDecSizeBytes;
55 ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
58 silkDecSizeBytes = align(silkDecSizeBytes);
59 celtDecSizeBytes = celt_decoder_get_size(channels);
60 return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes;
64 OpusDecoder *opus_decoder_init(OpusDecoder *st, int Fs, int channels)
67 CELTDecoder *celt_dec;
68 int ret, silkDecSizeBytes;
70 if (channels<1 || channels > 2)
72 memset(st, 0, opus_decoder_get_size(channels));
73 /* Initialize SILK encoder */
74 ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
78 silkDecSizeBytes = align(silkDecSizeBytes);
79 st->silk_dec_offset = align(sizeof(OpusDecoder));
80 st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes;
81 silk_dec = (char*)st+st->silk_dec_offset;
82 celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
83 st->stream_channels = st->channels = channels;
88 ret = silk_InitDecoder( silk_dec );
93 /* Initialize CELT decoder */
94 celt_decoder_init(celt_dec, Fs, channels, &ret);
97 celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
106 OpusDecoder *opus_decoder_create(int Fs, int channels)
108 char *raw_state = (char*)malloc(opus_decoder_get_size(channels));
109 if (raw_state == NULL)
111 return opus_decoder_init((OpusDecoder*)raw_state, Fs, channels);
114 static void smooth_fade(const opus_int16 *in1, const opus_int16 *in2, opus_int16 *out,
115 int overlap, int channels, const opus_val16 *window, int Fs)
119 for (c=0;c<channels;c++)
121 for (i=0;i<overlap;i++)
123 opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
124 out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
125 Q15ONE-w, in1[i*channels+c]), 15);
130 static int opus_packet_get_mode(const unsigned char *data)
135 mode = MODE_CELT_ONLY;
136 } else if ((data[0]&0x60) == 0x60)
141 mode = MODE_SILK_ONLY;
146 static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
147 int len, opus_int16 *pcm, int frame_size, int decode_fec)
150 CELTDecoder *celt_dec;
151 int i, silk_ret=0, celt_ret=0;
153 silk_DecControlStruct DecControl;
154 opus_int32 silk_frame_size;
155 opus_int16 pcm_celt[960*2];
156 opus_int16 pcm_transition[480*2];
163 int redundancy_bytes = 0;
165 opus_int16 redundant_audio[240*2];
167 int F2_5, F5, F10, F20;
168 const opus_val16 *window;
170 silk_dec = (char*)st+st->silk_dec_offset;
171 celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
176 /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
180 /* In that case, don't conceal more than what the ToC says */
181 frame_size = IMIN(frame_size, st->frame_size);
185 audiosize = st->frame_size;
187 ec_dec_init(&dec,(unsigned char*)data,len);
189 audiosize = frame_size;
190 if (st->prev_mode == 0)
192 /* If we haven't got any packet yet, all we can do is return zeros */
193 for (i=0;i<audiosize;i++)
197 mode = st->prev_mode;
201 if (data!=NULL && !st->prev_redundancy && mode != st->prev_mode && st->prev_mode > 0
202 && !(mode == MODE_SILK_ONLY && st->prev_mode == MODE_HYBRID)
203 && !(mode == MODE_HYBRID && st->prev_mode == MODE_SILK_ONLY))
206 if (mode == MODE_CELT_ONLY)
207 opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F10, audiosize), 0);
209 if (audiosize > frame_size)
211 fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);
214 frame_size = audiosize;
217 /* SILK processing */
218 if (mode != MODE_CELT_ONLY)
220 int lost_flag, decoded_samples;
221 opus_int16 *pcm_ptr = pcm;
223 if (st->prev_mode==MODE_CELT_ONLY)
224 silk_InitDecoder( silk_dec );
226 DecControl.API_sampleRate = st->Fs;
227 DecControl.nChannelsAPI = st->channels;
228 DecControl.nChannelsInternal = st->stream_channels;
229 DecControl.payloadSize_ms = 1000 * audiosize / st->Fs;
230 if( mode == MODE_SILK_ONLY ) {
231 if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) {
232 DecControl.internalSampleRate = 8000;
233 } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) {
234 DecControl.internalSampleRate = 12000;
235 } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) {
236 DecControl.internalSampleRate = 16000;
238 DecControl.internalSampleRate = 16000;
243 DecControl.internalSampleRate = 16000;
246 lost_flag = data == NULL ? 1 : 2 * decode_fec;
249 /* Call SILK decoder */
250 int first_frame = decoded_samples == 0;
251 silk_ret = silk_Decode( silk_dec, &DecControl,
252 lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
255 /* PLC failure should not be fatal */
256 silk_frame_size = frame_size;
257 for (i=0;i<frame_size*st->channels;i++)
260 return OPUS_CORRUPTED_DATA;
262 pcm_ptr += silk_frame_size * st->channels;
263 decoded_samples += silk_frame_size;
264 } while( decoded_samples < frame_size );
266 for (i=0;i<frame_size*st->channels;i++)
271 if (mode != MODE_CELT_ONLY && data != NULL)
273 /* Check if we have a redundant 0-8 kHz band */
274 redundancy = ec_dec_bit_logp(&dec, 12);
277 celt_to_silk = ec_dec_bit_logp(&dec, 1);
278 if (mode == MODE_HYBRID)
279 redundancy_bytes = 2 + ec_dec_uint(&dec, 256);
281 redundancy_bytes = len - ((ec_tell(&dec)+7)>>3);
282 /* Can only happen on an invalid packet */
283 if (redundancy_bytes<0)
285 redundancy_bytes = 0;
289 len -= redundancy_bytes;
291 return OPUS_CORRUPTED_DATA;
292 /* Shrink decoder because of raw bits */
293 dec.storage -= redundancy_bytes;
296 if (mode != MODE_CELT_ONLY)
302 switch(st->bandwidth)
304 case OPUS_BANDWIDTH_NARROWBAND:
307 case OPUS_BANDWIDTH_MEDIUMBAND:
308 case OPUS_BANDWIDTH_WIDEBAND:
311 case OPUS_BANDWIDTH_SUPERWIDEBAND:
314 case OPUS_BANDWIDTH_FULLBAND:
318 celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband));
319 celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels));
325 if (transition && mode != MODE_CELT_ONLY)
326 opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F10, audiosize), 0);
328 /* 5 ms redundant frame for CELT->SILK*/
329 if (redundancy && celt_to_silk)
331 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
332 celt_decode(celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
333 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
336 /* MUST be after PLC */
337 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band));
340 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
342 if (mode != MODE_SILK_ONLY)
344 int celt_frame_size = IMIN(F20, frame_size);
346 celt_ret = celt_decode_with_ec(celt_dec, decode_fec?NULL:data, len, pcm_celt, celt_frame_size, &dec);
347 for (i=0;i<celt_frame_size*st->channels;i++)
348 pcm[i] = SAT16(pcm[i] + (int)pcm_celt[i]);
352 const CELTMode *celt_mode;
353 celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode));
354 window = celt_mode->window;
357 /* 5 ms redundant frame for SILK->CELT */
358 if (redundancy && !celt_to_silk)
360 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
361 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
363 celt_decode(celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
364 smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
365 pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
367 if (redundancy && celt_to_silk)
369 for (c=0;c<st->channels;c++)
372 pcm[st->channels*i+c] = redundant_audio[st->channels*i+c];
374 smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
375 pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
379 for (i=0;i<st->channels*F2_5;i++)
380 pcm[i] = pcm_transition[i];
382 smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5,
383 pcm+st->channels*F2_5, F2_5,
384 st->channels, window, st->Fs);
387 st->rangeFinal = dec.rng;
389 st->prev_mode = mode;
390 st->prev_redundancy = redundancy;
391 return celt_ret<0 ? celt_ret : audiosize;
395 static int parse_size(const unsigned char *data, int len, short *size)
401 } else if (data[0]<252)
410 *size = 4*data[1] + data[0];
415 int opus_decode(OpusDecoder *st, const unsigned char *data,
416 int len, opus_int16 *pcm, int frame_size, int decode_fec)
418 int i, bytes, nb_samples;
420 unsigned char ch, toc;
421 /* 48 x 2.5 ms = 120 ms */
423 if (len==0 || data==NULL)
424 return opus_decode_frame(st, NULL, 0, pcm, frame_size, 0);
427 st->mode = opus_packet_get_mode(data);
428 st->bandwidth = opus_packet_get_bandwidth(data);
429 st->frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
430 st->stream_channels = opus_packet_get_nb_channels(data);
444 return OPUS_CORRUPTED_DATA;
445 size[0] = size[1] = len/2;
450 bytes = parse_size(data, len, size);
452 if (size[0]<0 || size[0] > len)
453 return OPUS_CORRUPTED_DATA;
455 size[1] = len-size[0];
457 /* Multiple CBR/VBR frames (from 0 to 120 ms) */
460 return OPUS_CORRUPTED_DATA;
461 /* Number of frames encoded in bits 0 to 5 */
464 if (count <= 0 || st->frame_size*count*25 > 3*st->Fs)
465 return OPUS_CORRUPTED_DATA;
467 /* Padding flag is bit 6 */
474 return OPUS_CORRUPTED_DATA;
477 padding += p==255 ? 254: p;
482 return OPUS_CORRUPTED_DATA;
483 /* VBR flag is bit 7 */
488 for (i=0;i<count-1;i++)
490 bytes = parse_size(data, len, size+i);
492 if (size[i]<0 || size[i] > len)
493 return OPUS_CORRUPTED_DATA;
495 last_size -= bytes+size[i];
498 return OPUS_CORRUPTED_DATA;
499 size[count-1]=last_size;
504 return OPUS_CORRUPTED_DATA;
505 for (i=0;i<count;i++)
510 /* Because it's not encoded explicitly, it's possible the size of the
511 last packet (or all the packets, for the CBR case) is larger than
514 if (size[count-1] > MAX_PACKET)
515 return OPUS_CORRUPTED_DATA;
516 if (count*st->frame_size > frame_size)
519 for (i=0;i<count;i++)
522 ret = opus_decode_frame(st, data, size[i], pcm, frame_size-nb_samples, decode_fec);
526 pcm += ret*st->channels;
531 int opus_decoder_ctl(OpusDecoder *st, int request, ...)
535 va_start(ap, request);
539 case OPUS_GET_MODE_REQUEST:
541 int *value = va_arg(ap, int*);
542 *value = st->prev_mode;
545 case OPUS_SET_BANDWIDTH_REQUEST:
547 int value = va_arg(ap, int);
548 st->bandwidth = value;
551 case OPUS_GET_BANDWIDTH_REQUEST:
553 int *value = va_arg(ap, int*);
554 *value = st->bandwidth;
558 fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);
566 void opus_decoder_destroy(OpusDecoder *st)
571 int opus_decoder_get_final_range(OpusDecoder *st)
573 return st->rangeFinal;
576 int opus_packet_get_bandwidth(const unsigned char *data)
581 bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
582 if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
583 bandwidth = OPUS_BANDWIDTH_NARROWBAND;
584 } else if ((data[0]&0x60) == 0x60)
586 bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND : OPUS_BANDWIDTH_SUPERWIDEBAND;
589 bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
594 int opus_packet_get_samples_per_frame(const unsigned char *data, int Fs)
599 audiosize = ((data[0]>>3)&0x3);
600 audiosize = (Fs<<audiosize)/400;
601 } else if ((data[0]&0x60) == 0x60)
603 audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
606 audiosize = ((data[0]>>3)&0x3);
608 audiosize = Fs*60/1000;
610 audiosize = (Fs<<audiosize)/100;
615 int opus_packet_get_nb_channels(const unsigned char *data)
617 return (data[0]&0x4) ? 2 : 1;
620 int opus_packet_get_nb_frames(const unsigned char packet[], int len)
625 count = packet[0]&0x3;
631 return OPUS_CORRUPTED_DATA;
633 return packet[1]&0x3F;
636 int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], int len)
639 int count = opus_packet_get_nb_frames(packet, len);
640 samples = count*opus_packet_get_samples_per_frame(packet, dec->Fs);
641 /* Can't have more than 120 ms */
642 if (samples*25 > dec->Fs*3)
643 return OPUS_CORRUPTED_DATA;