1 /* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited
2 Written by Jean-Marc Valin and Koen Vos */
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "stack_alloc.h"
42 #include "float_cast.h"
45 #define celt_decode_native celt_decode
47 #define celt_decode_native celt_decode_float
57 /* Sampling rate (at the API level) */
68 static inline opus_int16 SAT16(opus_int32 x) {
69 return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
73 /* Make sure everything's aligned to 4 bytes (this may need to be increased
74 on really weird architectures) */
75 static inline int align(int i)
80 int opus_decoder_get_size(int channels)
82 int silkDecSizeBytes, celtDecSizeBytes;
84 ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
87 silkDecSizeBytes = align(silkDecSizeBytes);
88 celtDecSizeBytes = celt_decoder_get_size(channels);
89 return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes;
93 OpusDecoder *opus_decoder_init(OpusDecoder *st, int Fs, int channels)
96 CELTDecoder *celt_dec;
97 int ret, silkDecSizeBytes;
99 if (channels<1 || channels > 2)
101 memset(st, 0, opus_decoder_get_size(channels));
102 /* Initialize SILK encoder */
103 ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
107 silkDecSizeBytes = align(silkDecSizeBytes);
108 st->silk_dec_offset = align(sizeof(OpusDecoder));
109 st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes;
110 silk_dec = (char*)st+st->silk_dec_offset;
111 celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
112 st->stream_channels = st->channels = channels;
117 ret = silk_InitDecoder( silk_dec );
122 /* Initialize CELT decoder */
123 celt_decoder_init(celt_dec, Fs, channels, &ret);
126 celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
129 st->frame_size = Fs/400;
136 OpusDecoder *opus_decoder_create(int Fs, int channels)
138 char *raw_state = (char*)malloc(opus_decoder_get_size(channels));
139 if (raw_state == NULL)
141 return opus_decoder_init((OpusDecoder*)raw_state, Fs, channels);
144 static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, opus_val16 *out,
145 int overlap, int channels, const opus_val16 *window, int Fs)
149 for (c=0;c<channels;c++)
151 for (i=0;i<overlap;i++)
153 opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
154 out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
155 Q15ONE-w, in1[i*channels+c]), 15);
160 static int opus_packet_get_mode(const unsigned char *data)
165 mode = MODE_CELT_ONLY;
166 } else if ((data[0]&0x60) == 0x60)
171 mode = MODE_SILK_ONLY;
176 static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
177 int len, opus_val16 *pcm, int frame_size, int decode_fec)
180 CELTDecoder *celt_dec;
181 int i, silk_ret=0, celt_ret=0;
183 silk_DecControlStruct DecControl;
184 opus_int32 silk_frame_size;
185 VARDECL(opus_int16, pcm_silk);
186 VARDECL(opus_val16, pcm_transition);
187 VARDECL(opus_val16, redundant_audio);
194 int redundancy_bytes = 0;
197 int F2_5, F5, F10, F20;
198 const opus_val16 *window;
201 silk_dec = (char*)st+st->silk_dec_offset;
202 celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
207 if (frame_size < F2_5)
208 return OPUS_BUFFER_TOO_SMALL;
209 /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
213 /* In that case, don't conceal more than what the ToC says */
214 /* FIXME: What if st->frame_size has never been set? */
215 frame_size = IMIN(frame_size, st->frame_size);
219 audiosize = st->frame_size;
221 ec_dec_init(&dec,(unsigned char*)data,len);
223 audiosize = frame_size;
225 if (st->prev_mode == 0)
227 /* If we haven't got any packet yet, all we can do is return zeros */
228 for (i=0;i<audiosize*st->channels;i++)
233 mode = st->prev_mode;
237 ALLOC(pcm_transition, F5*st->channels, opus_val16);
239 if (data!=NULL && !st->prev_redundancy && mode != st->prev_mode && st->prev_mode > 0
240 && !(mode == MODE_SILK_ONLY && st->prev_mode == MODE_HYBRID)
241 && !(mode == MODE_HYBRID && st->prev_mode == MODE_SILK_ONLY))
244 if (mode == MODE_CELT_ONLY)
245 opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
247 if (audiosize > frame_size)
249 fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);
253 frame_size = audiosize;
256 ALLOC(pcm_silk, frame_size*st->channels, opus_int16);
257 ALLOC(redundant_audio, F5*st->channels, opus_val16);
259 /* SILK processing */
260 if (mode != MODE_CELT_ONLY)
262 int lost_flag, decoded_samples;
263 opus_int16 *pcm_ptr = pcm_silk;
265 if (st->prev_mode==MODE_CELT_ONLY)
266 silk_InitDecoder( silk_dec );
268 DecControl.API_sampleRate = st->Fs;
269 DecControl.nChannelsAPI = st->channels;
270 DecControl.nChannelsInternal = st->stream_channels;
271 DecControl.payloadSize_ms = 1000 * audiosize / st->Fs;
272 if( mode == MODE_SILK_ONLY ) {
273 if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) {
274 DecControl.internalSampleRate = 8000;
275 } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) {
276 DecControl.internalSampleRate = 12000;
277 } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) {
278 DecControl.internalSampleRate = 16000;
280 DecControl.internalSampleRate = 16000;
285 DecControl.internalSampleRate = 16000;
288 lost_flag = data == NULL ? 1 : 2 * decode_fec;
291 /* Call SILK decoder */
292 int first_frame = decoded_samples == 0;
293 silk_ret = silk_Decode( silk_dec, &DecControl,
294 lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
297 /* PLC failure should not be fatal */
298 silk_frame_size = frame_size;
299 for (i=0;i<frame_size*st->channels;i++)
303 return OPUS_CORRUPTED_DATA;
306 pcm_ptr += silk_frame_size * st->channels;
307 decoded_samples += silk_frame_size;
308 } while( decoded_samples < frame_size );
312 if (mode != MODE_CELT_ONLY && data != NULL)
314 /* Check if we have a redundant 0-8 kHz band */
315 redundancy = ec_dec_bit_logp(&dec, 12);
318 celt_to_silk = ec_dec_bit_logp(&dec, 1);
319 if (mode == MODE_HYBRID)
320 redundancy_bytes = 2 + ec_dec_uint(&dec, 256);
322 redundancy_bytes = len - ((ec_tell(&dec)+7)>>3);
323 /* Can only happen on an invalid packet */
324 if (redundancy_bytes<0)
326 redundancy_bytes = 0;
330 len -= redundancy_bytes;
333 return OPUS_CORRUPTED_DATA;
335 /* Shrink decoder because of raw bits */
336 dec.storage -= redundancy_bytes;
339 if (mode != MODE_CELT_ONLY)
345 switch(st->bandwidth)
347 case OPUS_BANDWIDTH_NARROWBAND:
350 case OPUS_BANDWIDTH_MEDIUMBAND:
351 case OPUS_BANDWIDTH_WIDEBAND:
354 case OPUS_BANDWIDTH_SUPERWIDEBAND:
357 case OPUS_BANDWIDTH_FULLBAND:
361 celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband));
362 celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels));
368 if (transition && mode != MODE_CELT_ONLY)
369 opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
371 /* 5 ms redundant frame for CELT->SILK*/
372 if (redundancy && celt_to_silk)
374 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
375 celt_decode_native(celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
376 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
379 /* MUST be after PLC */
380 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band));
383 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
385 if (mode != MODE_SILK_ONLY)
387 int celt_frame_size = IMIN(F20, frame_size);
389 celt_ret = celt_decode_with_ec(celt_dec, decode_fec?NULL:data, len, pcm, celt_frame_size, &dec);
391 for (i=0;i<frame_size*st->channels;i++)
395 if (mode != MODE_CELT_ONLY)
398 for (i=0;i<frame_size*st->channels;i++)
399 pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
401 for (i=0;i<frame_size*st->channels;i++)
402 pcm[i] = pcm[i] + (1./32768.)*pcm_silk[i];
407 const CELTMode *celt_mode;
408 celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode));
409 window = celt_mode->window;
412 /* 5 ms redundant frame for SILK->CELT */
413 if (redundancy && !celt_to_silk)
415 celt_decoder_ctl(celt_dec, CELT_RESET_STATE);
416 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
418 celt_decode_native(celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
419 smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
420 pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
422 if (redundancy && celt_to_silk)
424 for (c=0;c<st->channels;c++)
427 pcm[st->channels*i+c] = redundant_audio[st->channels*i+c];
429 smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
430 pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
434 for (i=0;i<st->channels*F2_5;i++)
435 pcm[i] = pcm_transition[i];
437 smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5,
438 pcm+st->channels*F2_5, F2_5,
439 st->channels, window, st->Fs);
442 st->rangeFinal = dec.rng;
444 st->prev_mode = mode;
445 st->prev_redundancy = redundancy;
447 return celt_ret<0 ? celt_ret : audiosize;
451 static int parse_size(const unsigned char *data, int len, short *size)
457 } else if (data[0]<252)
466 *size = 4*data[1] + data[0];
471 static int opus_packet_parse_impl(const unsigned char *data, int len,
472 int self_delimited, unsigned char *out_toc,
473 const unsigned char *frames[48], short size[48], int *payload_offset)
478 unsigned char ch, toc;
481 const unsigned char *data0 = data;
486 framesize = opus_packet_get_samples_per_frame(data, 48000);
505 return OPUS_CORRUPTED_DATA;
506 size[0] = last_size = len/2;
512 bytes = parse_size(data, len, size);
514 if (size[0]<0 || size[0] > len)
515 return OPUS_CORRUPTED_DATA;
517 last_size = len-size[0];
519 /* Multiple CBR/VBR frames (from 0 to 120 ms) */
522 return OPUS_CORRUPTED_DATA;
523 /* Number of frames encoded in bits 0 to 5 */
526 if (count <= 0 || framesize*count > 5760)
527 return OPUS_CORRUPTED_DATA;
529 /* Padding flag is bit 6 */
536 return OPUS_CORRUPTED_DATA;
539 padding += p==255 ? 254: p;
544 return OPUS_CORRUPTED_DATA;
545 /* VBR flag is bit 7 */
551 for (i=0;i<count-1;i++)
553 bytes = parse_size(data, len, size+i);
555 if (size[i]<0 || size[i] > len)
556 return OPUS_CORRUPTED_DATA;
558 last_size -= bytes+size[i];
561 return OPUS_CORRUPTED_DATA;
562 } else if (!self_delimited)
565 last_size = len/count;
566 if (last_size*count!=len)
567 return OPUS_CORRUPTED_DATA;
568 for (i=0;i<count-1;i++)
573 /* Self-delimited framing has an extra size for the last frame. */
576 bytes = parse_size(data, len, size+count-1);
578 if (size[count-1]<0 || size[count-1] > len)
579 return OPUS_CORRUPTED_DATA;
581 /* For CBR packets, apply the size to all the frames. */
584 if (size[count-1]*count > len)
585 return OPUS_CORRUPTED_DATA;
586 for (i=0;i<count-1;i++)
587 size[i] = size[count-1];
588 } else if(size[count-1] > last_size)
589 return OPUS_CORRUPTED_DATA;
592 /* Because it's not encoded explicitly, it's possible the size of the
593 last packet (or all the packets, for the CBR case) is larger than
596 if (last_size > 1275)
597 return OPUS_CORRUPTED_DATA;
598 size[count-1] = last_size;
603 for (i=0;i<count;i++)
614 *payload_offset = data-data0;
619 int opus_packet_parse(const unsigned char *data, int len,
620 unsigned char *out_toc, const unsigned char *frames[48],
621 short size[48], int *payload_offset)
623 return opus_packet_parse_impl(data, len, 0,
624 out_toc, frames, size, payload_offset);
628 int opus_decode(OpusDecoder *st, const unsigned char *data,
629 int len, opus_val16 *pcm, int frame_size, int decode_fec)
631 int opus_decode_float(OpusDecoder *st, const unsigned char *data,
632 int len, opus_val16 *pcm, int frame_size, int decode_fec)
638 /* 48 x 2.5 ms = 120 ms */
640 if (len==0 || data==NULL)
641 return opus_decode_frame(st, NULL, 0, pcm, frame_size, 0);
644 st->mode = opus_packet_get_mode(data);
645 st->bandwidth = opus_packet_get_bandwidth(data);
646 st->frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
647 st->stream_channels = opus_packet_get_nb_channels(data);
649 count = opus_packet_parse_impl(data, len, 0, &toc, NULL, size, &offset);
654 if (count*st->frame_size > frame_size)
657 for (i=0;i<count;i++)
660 ret = opus_decode_frame(st, data, size[i], pcm, frame_size-nb_samples, decode_fec);
664 pcm += ret*st->channels;
672 #ifndef DISABLE_FLOAT_API
673 int opus_decode_float(OpusDecoder *st, const unsigned char *data,
674 int len, float *pcm, int frame_size, int decode_fec)
676 VARDECL(opus_int16, out);
680 ALLOC(out, frame_size*st->channels, opus_int16);
682 ret = opus_decode(st, data, len, out, frame_size, decode_fec);
685 for (i=0;i<ret*st->channels;i++)
686 pcm[i] = (1./32768.)*(out[i]);
694 int opus_decode(OpusDecoder *st, const unsigned char *data,
695 int len, opus_int16 *pcm, int frame_size, int decode_fec)
701 ALLOC(out, frame_size*st->channels, float);
703 ret = opus_decode_float(st, data, len, out, frame_size, decode_fec);
706 for (i=0;i<ret*st->channels;i++)
707 pcm[i] = FLOAT2INT16(out[i]);
714 int opus_decoder_ctl(OpusDecoder *st, int request, ...)
718 va_start(ap, request);
722 case OPUS_GET_BANDWIDTH_REQUEST:
724 int *value = va_arg(ap, int*);
725 *value = st->bandwidth;
729 fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);
737 void opus_decoder_destroy(OpusDecoder *st)
742 int opus_decoder_get_final_range(OpusDecoder *st)
744 return st->rangeFinal;
747 int opus_packet_get_bandwidth(const unsigned char *data)
752 bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
753 if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
754 bandwidth = OPUS_BANDWIDTH_NARROWBAND;
755 } else if ((data[0]&0x60) == 0x60)
757 bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND : OPUS_BANDWIDTH_SUPERWIDEBAND;
760 bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
765 int opus_packet_get_samples_per_frame(const unsigned char *data, int Fs)
770 audiosize = ((data[0]>>3)&0x3);
771 audiosize = (Fs<<audiosize)/400;
772 } else if ((data[0]&0x60) == 0x60)
774 audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
777 audiosize = ((data[0]>>3)&0x3);
779 audiosize = Fs*60/1000;
781 audiosize = (Fs<<audiosize)/100;
786 int opus_packet_get_nb_channels(const unsigned char *data)
788 return (data[0]&0x4) ? 2 : 1;
791 int opus_packet_get_nb_frames(const unsigned char packet[], int len)
796 count = packet[0]&0x3;
802 return OPUS_CORRUPTED_DATA;
804 return packet[1]&0x3F;
807 int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], int len)
810 int count = opus_packet_get_nb_frames(packet, len);
811 samples = count*opus_packet_get_samples_per_frame(packet, dec->Fs);
812 /* Can't have more than 120 ms */
813 if (samples*25 > dec->Fs*3)
814 return OPUS_CORRUPTED_DATA;