Fixes 60 ms speech mode
[opus.git] / src / opus_decoder.c
index b06cbaf..14ab6cc 100644 (file)
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
+#include "celt.h"
 #include "opus_decoder.h"
 #include "entdec.h"
 #include "modes.h"
@@ -66,23 +67,45 @@ OpusDecoder *opus_decoder_create(int Fs, int channels)
 
        /* Initialize CELT decoder */
        st->celt_dec = celt_decoder_init(st->celt_dec, Fs, channels, NULL);
+    celt_decoder_ctl(st->celt_dec, CELT_SET_SIGNALLING(0));
 
        st->prev_mode = 0;
        return st;
 }
 
-static void smooth_fade(const short *in1, const short *in2, short *out, int overlap, int channels)
+static void smooth_fade(const short *in1, const short *in2, short *out,
+        int overlap, int channels, const celt_word16 *window, int Fs)
 {
        int i, c;
+       int inc = 48000/Fs;
        for (c=0;c<channels;c++)
        {
-               /* FIXME: Make this 16-bit safe, remove division */
                for (i=0;i<overlap;i++)
-                       out[i*channels+c] = (i*in2[i*channels+c] + (overlap-i)*in1[i*channels+c])/overlap;
+               {
+                   celt_word16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+                   out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
+                           Q15ONE-w, in1[i*channels+c]), 15);
+               }
        }
 }
 
-int opus_decode(OpusDecoder *st, const unsigned char *data,
+static int opus_packet_get_mode(const unsigned char *data)
+{
+       int mode;
+    if (data[0]&0x80)
+    {
+        mode = MODE_CELT_ONLY;
+    } else if ((data[0]&0x60) == 0x60)
+    {
+        mode = MODE_HYBRID;
+    } else {
+
+        mode = MODE_SILK_ONLY;
+    }
+    return mode;
+}
+
+static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
                int len, short *pcm, int frame_size, int decode_fec)
 {
        int i, silk_ret=0, celt_ret=0;
@@ -100,66 +123,40 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
     int celt_to_silk=0;
     short redundant_audio[240*2];
     int c;
+    int F2_5, F5, F10;
+    const celt_word16 *window;
 
+    F10 = st->Fs/100;
+    F5 = F10>>1;
+    F2_5 = F5>>1;
     /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
-    if (len<=2)
+    if (len<=1)
        data = NULL;
 
+       audiosize = st->frame_size;
     if (data != NULL)
     {
-        /* Decoding mode/bandwidth/framesize from first byte */
-        if (data[0]&0x80)
-        {
-            mode = MODE_CELT_ONLY;
-            st->bandwidth = BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
-            if (st->bandwidth == BANDWIDTH_MEDIUMBAND)
-                st->bandwidth = BANDWIDTH_NARROWBAND;
-            audiosize = ((data[0]>>3)&0x3);
-            audiosize = (st->Fs<<audiosize)/400;
-        } else if ((data[0]&0x60) == 0x60)
-        {
-            mode = MODE_HYBRID;
-            st->bandwidth = (data[0]&0x10) ? BANDWIDTH_FULLBAND : BANDWIDTH_SUPERWIDEBAND;
-            audiosize = (data[0]&0x08) ? st->Fs/50 : st->Fs/100;
-        } else {
-
-            mode = MODE_SILK_ONLY;
-            st->bandwidth = BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
-            audiosize = ((data[0]>>3)&0x3);
-            if (audiosize == 3)
-                audiosize = st->Fs*60/1000;
-            else
-                audiosize = (st->Fs<<audiosize)/100;
-        }
-        st->stream_channels = (data[0]&0x4) ? 2 : 1;
-        /*printf ("%d %d %d\n", st->mode, st->bandwidth, audiosize);*/
-
-        len -= 1;
-        data += 1;
+       mode = st->mode;
         ec_dec_init(&dec,(unsigned char*)data,len);
     } else {
-        audiosize = frame_size;
         mode = st->prev_mode;
     }
 
     if (st->stream_channels > st->channels)
         return OPUS_CORRUPTED_DATA;
 
-    if (st->stream_channels == 2 && mode != MODE_CELT_ONLY)
-        return OPUS_UNIMPLEMENTED;
-
     if (data!=NULL && !st->prev_redundancy && mode != st->prev_mode && st->prev_mode > 0
                && !(mode == MODE_SILK_ONLY && st->prev_mode == MODE_HYBRID)
                && !(mode == MODE_HYBRID && st->prev_mode == MODE_SILK_ONLY))
     {
        transition = 1;
        if (mode == MODE_CELT_ONLY)
-           opus_decode(st, NULL, 0, pcm_transition, IMAX(st->Fs/100, audiosize), 0);
+           opus_decode_frame(st, NULL, 0, pcm_transition, IMAX(F10, audiosize), 0);
     }
     if (audiosize > frame_size)
     {
         fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);
-        return -1;
+        return OPUS_BAD_ARG;
     } else {
         frame_size = audiosize;
     }
@@ -190,6 +187,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
             /* Hybrid mode */
             DecControl.internalSampleRate = 16000;
         }
+        DecControl.nChannels = st->channels;
 
         lost_flag = data == NULL ? 1 : 2 * decode_fec;
         decoded_samples = 0;
@@ -197,12 +195,12 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
             /* Call SILK decoder */
             int first_frame = decoded_samples == 0;
             silk_ret = SKP_Silk_SDK_Decode( st->silk_dec, &DecControl, 
-                lost_flag, first_frame, &dec, len, pcm_ptr, &silk_frame_size );
+                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
             if( silk_ret ) {
                 fprintf (stderr, "SILK decode error\n");
                 /* Handle error */
             }
-            pcm_ptr += silk_frame_size;
+            pcm_ptr += silk_frame_size * st->channels;
             decoded_samples += silk_frame_size;
         } while( decoded_samples < frame_size );
     } else {
@@ -223,11 +221,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
             else
                redundancy_bytes = len - ((ec_tell(&dec)+7)>>3);
             len -= redundancy_bytes;
+            if (len<0)
+                return CELT_CORRUPTED_DATA;
             /* Shrink decoder because of raw bits */
             dec.storage -= redundancy_bytes;
         }
-        start_band = 17;
     }
+    if (mode != MODE_CELT_ONLY)
+       start_band = 17;
 
     if (mode != MODE_SILK_ONLY)
     {
@@ -256,12 +257,12 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
         transition = 0;
 
     if (transition && mode != MODE_CELT_ONLY)
-        opus_decode(st, NULL, 0, pcm_transition, IMAX(st->Fs/100, audiosize), 0);
+        opus_decode_frame(st, NULL, 0, pcm_transition, IMAX(F10, audiosize), 0);
 
     /* 5 ms redundant frame for CELT->SILK*/
     if (redundancy && celt_to_silk)
     {
-        celt_decode(st->celt_dec, data+len, redundancy_bytes, redundant_audio, st->Fs/200);
+        celt_decode(st->celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
         celt_decoder_ctl(st->celt_dec, CELT_RESET_STATE);
     }
 
@@ -279,41 +280,40 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
             pcm[i] = ADD_SAT16(pcm[i], pcm_celt[i]);
     }
 
+
+    {
+        const CELTMode *celt_mode;
+        celt_decoder_ctl(st->celt_dec, CELT_GET_MODE(&celt_mode));
+        window = celt_mode->window;
+    }
+
     /* 5 ms redundant frame for SILK->CELT */
     if (redundancy && !celt_to_silk)
     {
-        int N2, N4;
-        N2 = st->Fs/200;
-        N4 = st->Fs/400;
         celt_decoder_ctl(st->celt_dec, CELT_RESET_STATE);
         celt_decoder_ctl(st->celt_dec, CELT_SET_START_BAND(0));
 
-        celt_decode(st->celt_dec, data+len, redundancy_bytes, redundant_audio, N2);
-        smooth_fade(pcm+st->channels*(frame_size-N4), redundant_audio+st->channels*N4,
-                       pcm+st->channels*(frame_size-N4), N4, st->channels);
+        celt_decode(st->celt_dec, data+len, redundancy_bytes, redundant_audio, F5);
+        smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
+                       pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
     }
     if (redundancy && celt_to_silk)
     {
-        int N2, N4;
-        N2 = st->Fs/200;
-        N4 = st->Fs/400;
-
         for (c=0;c<st->channels;c++)
         {
-            for (i=0;i<N4;i++)
+            for (i=0;i<F2_5;i++)
                 pcm[st->channels*i+c] = redundant_audio[st->channels*i];
         }
-        smooth_fade(redundant_audio+st->channels*N4, pcm+st->channels*N4, pcm+st->channels*N4, N4, st->channels);
+        smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
+                pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
     }
     if (transition)
     {
-       int plc_length, overlap;
-       plc_length = IMIN(audiosize, 10+st->Fs/400);
-       for (i=0;i<plc_length;i++)
+       for (i=0;i<F2_5;i++)
                pcm[i] = pcm_transition[i];
-
-       overlap = IMIN(st->Fs/400, IMAX(0, audiosize-plc_length));
-       smooth_fade(pcm_transition+plc_length, pcm+plc_length, pcm+plc_length, overlap, st->channels);
+       if (audiosize >= F5)
+           smooth_fade(pcm_transition+F2_5, pcm+F2_5, pcm+F2_5, F2_5,
+                   st->channels, window, st->Fs);
     }
 #if OPUS_TEST_RANGE_CODER_STATE
     st->rangeFinal = dec.rng;
@@ -325,6 +325,137 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
 
 }
 
+static int parse_size(const unsigned char *data, int len, short *size)
+{
+       if (len<1)
+       {
+               *size = -1;
+               return -1;
+       } else if (data[0]<252)
+       {
+               *size = data[0];
+               return 1;
+       } else if (len<2)
+       {
+               *size = -1;
+               return -1;
+       } else {
+               *size = 4*data[1] + data[0];
+               return 2;
+       }
+}
+
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+               int len, short *pcm, int frame_size, int decode_fec)
+{
+       int i, bytes, nb_samples;
+       int count;
+       unsigned char ch, toc;
+       /* 48 x 2.5 ms = 120 ms */
+       short size[48];
+       if (len==0 || data==NULL)
+           return opus_decode_frame(st, NULL, 0, pcm, frame_size, 0);
+       else if (len<0)
+               return CELT_BAD_ARG;
+       st->mode = opus_packet_get_mode(data);
+       st->bandwidth = opus_packet_get_bandwidth(data);
+       st->frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
+       st->stream_channels = opus_packet_get_nb_channels(data);
+       toc = *data++;
+       len--;
+       switch (toc&0x3)
+       {
+       /* One frame */
+       case 0:
+               count=1;
+               size[0] = len;
+               break;
+               /* Two CBR frames */
+       case 1:
+               count=2;
+               if (len&0x1)
+                       return OPUS_CORRUPTED_DATA;
+               size[0] = size[1] = len/2;
+               break;
+               /* Two VBR frames */
+       case 2:
+               count = 2;
+               bytes = parse_size(data, len, size);
+               len -= bytes;
+               if (size[0]<0 || size[0] > len)
+                       return OPUS_CORRUPTED_DATA;
+               data += bytes;
+               size[1] = len-size[0];
+               break;
+               /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+       case 3:
+               if (len<1)
+                       return OPUS_CORRUPTED_DATA;
+               /* Number of frames encoded in bits 0 to 5 */
+               ch = *data++;
+               count = ch&0x3F;
+               if (st->frame_size*count*25 > 3*st->Fs)
+                   return OPUS_CORRUPTED_DATA;
+               len--;
+               /* Padding bit */
+               if (ch&0x40)
+               {
+                       int padding=0;
+                       int p;
+                       do {
+                               if (len<=0)
+                                       return OPUS_CORRUPTED_DATA;
+                               p = *data++;
+                               len--;
+                               padding += p==255 ? 254: p;
+                       } while (p==255);
+                       len -= padding;
+               }
+               if (len<0)
+                       return OPUS_CORRUPTED_DATA;
+               /* Bit 7 is VBR flag (bit 6 is ignored) */
+               if (ch&0x80)
+               {
+                       /* VBR case */
+                       int last_size=len;
+                       for (i=0;i<count-1;i++)
+                       {
+                               bytes = parse_size(data, len, size+i);
+                               len -= bytes;
+                               if (size[i]<0 || size[i] > len)
+                                       return OPUS_CORRUPTED_DATA;
+                               data += bytes;
+                               last_size -= bytes+size[i];
+                       }
+                       if (last_size<0)
+                               return OPUS_CORRUPTED_DATA;
+                       if (count)
+                               size[count-1]=last_size;
+               } else {
+                       /* CBR case */
+                       int sz = count != 0 ? len/count : 0;
+                       if (sz*count!=len)
+                               return OPUS_CORRUPTED_DATA;
+                       for (i=0;i<count;i++)
+                               size[i] = sz;
+               }
+               break;
+       }
+       if (count*st->frame_size > frame_size)
+               return OPUS_BAD_ARG;
+       nb_samples=0;
+       for (i=0;i<count;i++)
+       {
+               int ret;
+               ret = opus_decode_frame(st, data, len, pcm, frame_size-nb_samples, decode_fec);
+               if (ret<0)
+                       return ret;
+               data += size[i];
+               pcm += ret;
+               nb_samples += ret;
+       }
+       return nb_samples;
+}
 int opus_decoder_ctl(OpusDecoder *st, int request, ...)
 {
     va_list ap;
@@ -357,6 +488,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
     }
 
     va_end(ap);
+    return OPUS_OK;
 }
 
 void opus_decoder_destroy(OpusDecoder *st)
@@ -370,3 +502,77 @@ int opus_decoder_get_final_range(OpusDecoder *st)
     return st->rangeFinal;
 }
 #endif
+
+
+int opus_packet_get_bandwidth(const unsigned char *data)
+{
+       int bandwidth;
+    if (data[0]&0x80)
+    {
+        bandwidth = BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
+        if (bandwidth == BANDWIDTH_MEDIUMBAND)
+            bandwidth = BANDWIDTH_NARROWBAND;
+    } else if ((data[0]&0x60) == 0x60)
+    {
+        bandwidth = (data[0]&0x10) ? BANDWIDTH_FULLBAND : BANDWIDTH_SUPERWIDEBAND;
+    } else {
+
+        bandwidth = BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
+    }
+    return bandwidth;
+}
+
+int opus_packet_get_samples_per_frame(const unsigned char *data, int Fs)
+{
+       int audiosize;
+    if (data[0]&0x80)
+    {
+        audiosize = ((data[0]>>3)&0x3);
+        audiosize = (Fs<<audiosize)/400;
+    } else if ((data[0]&0x60) == 0x60)
+    {
+        audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+    } else {
+
+        audiosize = ((data[0]>>3)&0x3);
+        if (audiosize == 3)
+            audiosize = Fs*60/1000;
+        else
+            audiosize = (Fs<<audiosize)/100;
+    }
+    return audiosize;
+}
+
+int opus_packet_get_nb_channels(const unsigned char *data)
+{
+    return (data[0]&0x4) ? 2 : 1;
+}
+
+int opus_packet_get_nb_frames(const unsigned char packet[], int len)
+{
+       int count;
+       if (len<1)
+               return OPUS_BAD_ARG;
+       count = packet[0]&0x3;
+       if (count==0)
+               return 1;
+       else if (count!=3)
+               return 2;
+       else if (len<2)
+               return OPUS_CORRUPTED_DATA;
+       else
+               return packet[1]&0x3F;
+}
+
+int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], int len)
+{
+       int samples;
+       int count = opus_packet_get_nb_frames(packet, len);
+       samples = count*opus_packet_get_samples_per_frame(packet, dec->Fs);
+       /* Can't have more than 120 ms */
+       if (samples*25 > dec->Fs*3)
+               return OPUS_CORRUPTED_DATA;
+       else
+               return samples;
+}
+