decoder excitation now in 16-bit precision (was 32), which saves quite a bit
[speexdsp.git] / src / speexdec.c
index 898f098..2a3876a 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Jean-Marc Valin 
+/* Copyright (C) 2002-2006 Jean-Marc Valin 
    File: speexdec.c
 
    Redistribution and use in source and binary forms, with or without
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
 #include <stdio.h>
 #if !defined WIN32 && !defined _WIN32
 #include <unistd.h>
 #include <getopt.h>
 #endif
+#ifndef HAVE_GETOPT_LONG
+#include "getopt_win.h"
+#endif
 #include <stdlib.h>
 #include <string.h>
 
-#include "speex.h"
-#include "ogg/ogg.h"
+#include <speex/speex.h>
+#include <ogg/ogg.h>
 
 #if defined WIN32 || defined _WIN32
 #include <windows.h>
 #include <io.h>
 #include <fcntl.h>
 #endif
+#include <math.h>
+
+#ifdef __MINGW32__
+#include "wave_out.c"
+#endif
 
 #ifdef HAVE_SYS_SOUNDCARD_H
 #include <sys/soundcard.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
+
+#elif defined HAVE_SYS_AUDIOIO_H
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/audioio.h>
+#ifndef AUDIO_ENCODING_SLINEAR
+#define AUDIO_ENCODING_SLINEAR AUDIO_ENCODING_LINEAR /* Solaris */
+#endif
+
 #endif
 
 #include <string.h>
 #include "wav_io.h"
-#include "speex_header.h"
-#include "speex_stereo.h"
-#include "speex_callbacks.h"
-#include "misc.h"
+#include <speex/speex_header.h>
+#include <speex/speex_stereo.h>
+#include <speex/speex_callbacks.h>
+#include "wav_io.h"
 
 #define MAX_FRAME_SIZE 2000
 
@@ -75,18 +97,45 @@ static void print_comments(char *comments, int length)
 {
    char *c=comments;
    int len, i, nb_fields;
-
+   char *end;
+   
+   if (length<8)
+   {
+      fprintf (stderr, "Invalid/corrupted comments\n");
+      return;
+   }
+   end = c+length;
    len=readint(c, 0);
    c+=4;
+   if (c+len>end)
+   {
+      fprintf (stderr, "Invalid/corrupted comments\n");
+      return;
+   }
    fwrite(c, 1, len, stderr);
    c+=len;
    fprintf (stderr, "\n");
+   if (c+4>end)
+   {
+      fprintf (stderr, "Invalid/corrupted comments\n");
+      return;
+   }
    nb_fields=readint(c, 0);
    c+=4;
    for (i=0;i<nb_fields;i++)
    {
+      if (c+4>end)
+      {
+         fprintf (stderr, "Invalid/corrupted comments\n");
+         return;
+      }
       len=readint(c, 0);
       c+=4;
+      if (c+len>end)
+      {
+         fprintf (stderr, "Invalid/corrupted comments\n");
+         return;
+      }
       fwrite(c, 1, len, stderr);
       c+=len;
       fprintf (stderr, "\n");
@@ -108,7 +157,7 @@ FILE *out_file_open(char *outFile, int rate, int *channels)
          exit(1);         
       }
 
-      format=AFMT_S16_LE;
+      format=AFMT_S16_NE;
       if (ioctl(audio_fd, SNDCTL_DSP_SETFMT, &format)==-1)
       {
          perror("SNDCTL_DSP_SETFMT");
@@ -139,6 +188,32 @@ FILE *out_file_open(char *outFile, int rate, int *channels)
          exit(1);
       }
       fout = fdopen(audio_fd, "w");
+#elif defined HAVE_SYS_AUDIOIO_H
+      audio_info_t info;
+      int audio_fd;
+      
+      audio_fd = open("/dev/audio", O_WRONLY);
+      if (audio_fd<0)
+      {
+         perror("Cannot open /dev/audio");
+         exit(1);
+      }
+
+      AUDIO_INITINFO(&info);
+#ifdef AUMODE_PLAY    /* NetBSD/OpenBSD */
+      info.mode = AUMODE_PLAY;
+#endif
+      info.play.encoding = AUDIO_ENCODING_SLINEAR;
+      info.play.precision = 16;
+      info.play.sample_rate = rate;
+      info.play.channels = *channels;
+      
+      if (ioctl(audio_fd, AUDIO_SETINFO, &info) < 0)
+      {
+         perror ("AUDIO_SETINFO");
+         exit(1);
+      }
+      fout = fdopen(audio_fd, "w");
 #elif defined WIN32 || defined _WIN32
       {
          unsigned int speex_channels = *channels;
@@ -162,11 +237,7 @@ FILE *out_file_open(char *outFile, int rate, int *channels)
       }
       else 
       {
-#if defined WIN32 || defined _WIN32
          fout = fopen(outFile, "wb");
-#else
-         fout = fopen(outFile, "w");
-#endif
          if (!fout)
          {
             perror(outFile);
@@ -196,8 +267,8 @@ void usage()
    printf ("  (nothing)            Will be played to soundcard\n");
    printf ("\n");  
    printf ("Options:\n");
-   printf (" --enh                 Enable perceptual enhancement\n");
-   printf (" --no-enh              Disable perceptual enhancement (default FOR NOW)\n");
+   printf (" --enh                 Enable perceptual enhancement (default)\n");
+   printf (" --no-enh              Disable perceptual enhancement\n");
    printf (" --force-nb            Force decoding in narrowband\n");
    printf (" --force-wb            Force decoding in wideband\n");
    printf (" --force-uwb           Force decoding in ultra-wideband\n");
@@ -208,8 +279,8 @@ void usage()
    printf (" -V                    Verbose mode (show bit-rate)\n"); 
    printf (" -h, --help            This help\n");
    printf (" -v, --version         Version information\n");
-   printf (" --pf                  Deprecated, use --pf instead\n");
-   printf (" --no-pf               Deprecated, use --no-pf instead\n");
+   printf (" --pf                  Deprecated, use --enh instead\n");
+   printf (" --no-pf               Deprecated, use --no-enh instead\n");
    printf ("\n");
    printf ("More information is available from the Speex site: http://www.speex.org\n");
    printf ("\n");
@@ -218,20 +289,20 @@ void usage()
 
 void version()
 {
-   printf ("speexdec (Speex decoder) version " VERSION " (compiled " __DATE__ ")\n");
-   printf ("Copyright (C) 2002 Jean-Marc Valin\n");
+   printf ("speexdec (Speex decoder) version " SPEEX_VERSION " (compiled " __DATE__ ")\n");
+   printf ("Copyright (C) 2002-2006 Jean-Marc Valin\n");
 }
 
 void version_short()
 {
-   printf ("speexdec version " VERSION "\n");
-   printf ("Copyright (C) 2002 Jean-Marc Valin\n");
+   printf ("speexdec version " SPEEX_VERSION "\n");
+   printf ("Copyright (C) 2002-2006 Jean-Marc Valin\n");
 }
 
-static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, int *rate, int *nframes, int forceMode, int *channels, SpeexStereoState *stereo)
+static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, int *rate, int *nframes, int forceMode, int *channels, SpeexStereoState *stereo, int *extra_headers, int quiet)
 {
    void *st;
-   SpeexMode *mode;
+   const SpeexMode *mode;
    SpeexHeader *header;
    int modeID;
    SpeexCallback callback;
@@ -242,9 +313,9 @@ static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, in
       fprintf (stderr, "Cannot read header\n");
       return NULL;
    }
-   if (header->mode >= SPEEX_NB_MODES)
+   if (header->mode >= SPEEX_NB_MODES || header->mode<0)
    {
-      fprintf (stderr, "Mode number %d does not (any longer) exist in this version\n", 
+      fprintf (stderr, "Mode number %d does not (yet/any longer) exist in this version\n", 
                header->mode);
       return NULL;
    }
@@ -252,8 +323,15 @@ static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, in
    modeID = header->mode;
    if (forceMode!=-1)
       modeID = forceMode;
-   mode = speex_mode_list[modeID];
+
+   mode = speex_lib_get_mode (modeID);
    
+   if (header->speex_version_id > 1)
+   {
+      fprintf (stderr, "This file was encoded with Speex bit-stream version %d, which I don't know how to decode\n", header->speex_version_id);
+      return NULL;
+   }
+
    if (mode->bitstream_version < header->mode_bitstream_version)
    {
       fprintf (stderr, "The file was encoded with a newer version of Speex. You need to upgrade in order to play it.\n");
@@ -266,14 +344,21 @@ static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, in
    }
    
    st = speex_decoder_init(mode);
+   if (!st)
+   {
+      fprintf (stderr, "Decoder initialization failed.\n");
+      return NULL;
+   }
    speex_decoder_ctl(st, SPEEX_SET_ENH, &enh_enabled);
    speex_decoder_ctl(st, SPEEX_GET_FRAME_SIZE, frame_size);
 
-   callback.callback_id = SPEEX_INBAND_STEREO;
-   callback.func = speex_std_stereo_request_handler;
-   callback.data = stereo;
-   speex_decoder_ctl(st, SPEEX_SET_HANDLER, &callback);
-   
+   if (!(*channels==1))
+   {
+      callback.callback_id = SPEEX_INBAND_STEREO;
+      callback.func = speex_std_stereo_request_handler;
+      callback.data = stereo;
+      speex_decoder_ctl(st, SPEEX_SET_HANDLER, &callback);
+   }
    if (!*rate)
       *rate = header->rate;
    /* Adjust rate if --force-* options are used */
@@ -292,15 +377,25 @@ static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, in
    if (*channels==-1)
       *channels = header->nb_channels;
    
-   fprintf (stderr, "Decoding %d Hz audio using %s mode", 
-            *rate, mode->modeName);
+   if (!quiet)
+   {
+      fprintf (stderr, "Decoding %d Hz audio using %s mode", 
+               *rate, mode->modeName);
 
-   if (header->vbr)
-      fprintf (stderr, " (VBR)\n");
-   else
-      fprintf(stderr, "\n");
-   /*fprintf (stderr, "Decoding %d Hz audio at %d bps using %s mode\n", 
-    *rate, mode->bitrate, mode->modeName);*/
+      if (*channels==1)
+         fprintf (stderr, " (mono");
+      else
+         fprintf (stderr, " (stereo");
+      
+      if (header->vbr)
+         fprintf (stderr, ", VBR)\n");
+      else
+         fprintf(stderr, ")\n");
+      /*fprintf (stderr, "Decoding %d Hz audio at %d bps using %s mode\n", 
+       *rate, mode->bitrate, mode->modeName);*/
+   }
+
+   *extra_headers = header->extra_headers;
 
    free(header);
    return st;
@@ -313,15 +408,19 @@ int main(int argc, char **argv)
    char *inFile, *outFile;
    FILE *fin, *fout=NULL;
    short out[MAX_FRAME_SIZE];
-   float output[MAX_FRAME_SIZE];
+   short output[MAX_FRAME_SIZE];
    int frame_size=0;
    void *st=NULL;
    SpeexBits bits;
    int packet_count=0;
    int stream_init = 0;
+   int quiet = 0;
+   ogg_int64_t page_granule=0, last_granule=0;
+   int skip_samples=0, page_nb_packets;
    struct option long_options[] =
    {
       {"help", no_argument, NULL, 0},
+      {"quiet", no_argument, NULL, 0},
       {"version", no_argument, NULL, 0},
       {"version-short", no_argument, NULL, 0},
       {"enh", no_argument, NULL, 0},
@@ -352,8 +451,11 @@ int main(int argc, char **argv)
    SpeexStereoState stereo = SPEEX_STEREO_STATE_INIT;
    int channels=-1;
    int rate=0;
+   int extra_headers;
+   int wav_format=0;
+   int lookahead;
 
-   enh_enabled = 0;
+   enh_enabled = 1;
 
    /*Process options*/
    while(1)
@@ -370,6 +472,9 @@ int main(int argc, char **argv)
          {
             usage();
             exit(0);
+         } else if (strcmp(long_options[option_index].name,"quiet")==0)
+         {
+            quiet = 1;
          } else if (strcmp(long_options[option_index].name,"version")==0)
          {
             version();
@@ -443,6 +548,9 @@ int main(int argc, char **argv)
       outFile=argv[optind+1];
    else
       outFile = "";
+   wav_format = strlen(outFile)>=4 && (
+                                       strcmp(outFile+strlen(outFile)-4,".wav")==0
+                                       || strcmp(outFile+strlen(outFile)-4,".WAV")==0);
    /*Open input file*/
    if (strcmp(inFile, "-")==0)
    {
@@ -453,11 +561,7 @@ int main(int argc, char **argv)
    }
    else 
    {
-#if defined WIN32 || defined _WIN32
       fin = fopen(inFile, "rb");
-#else
-      fin = fopen(inFile, "r");
-#endif
       if (!fin)
       {
          perror(inFile);
@@ -485,35 +589,54 @@ int main(int argc, char **argv)
       /*Loop for all complete pages we got (most likely only one)*/
       while (ogg_sync_pageout(&oy, &og)==1)
       {
+         int packet_no;
          if (stream_init == 0) {
             ogg_stream_init(&os, ogg_page_serialno(&og));
             stream_init = 1;
          }
          /*Add page to the bitstream*/
          ogg_stream_pagein(&os, &og);
+         page_granule = ogg_page_granulepos(&og);
+         page_nb_packets = ogg_page_packets(&og);
+         if (page_granule>0 && frame_size)
+         {
+            /* FIXME: shift the granule values if --force-* is specified */
+            skip_samples = page_nb_packets*frame_size*nframes - (page_granule-last_granule);
+            if (ogg_page_eos(&og))
+               skip_samples = -skip_samples;
+            /*else if (!ogg_page_bos(&og))
+               skip_samples = 0;*/
+         } else
+         {
+            skip_samples = 0;
+         }
+         /*printf ("page granulepos: %d %d %d\n", skip_samples, page_nb_packets, (int)page_granule);*/
+         last_granule = page_granule;
          /*Extract all available packets*/
+         packet_no=0;
          while (!eos && ogg_stream_packetout(&os, &op)==1)
          {
             /*If first packet, process as Speex header*/
             if (packet_count==0)
             {
-               st = process_header(&op, enh_enabled, &frame_size, &rate, &nframes, forceMode, &channels, &stereo);
+               st = process_header(&op, enh_enabled, &frame_size, &rate, &nframes, forceMode, &channels, &stereo, &extra_headers, quiet);
+               speex_decoder_ctl(st, SPEEX_GET_LOOKAHEAD, &lookahead);
                if (!nframes)
                   nframes=1;
                if (!st)
                   exit(1);
                fout = out_file_open(outFile, rate, &channels);
 
-            } else if (packet_count==1){
-               print_comments((char*)op.packet, op.bytes);
-               /*
-               fprintf (stderr, "File comments: ");
-               fwrite(op.packet, 1, op.bytes, stderr);
-               fprintf (stderr, "\n");
-               */
+            } else if (packet_count==1)
+            {
+               if (!quiet)
+                  print_comments((char*)op.packet, op.bytes);
+            } else if (packet_count<=1+extra_headers)
+            {
+               /* Ignore extra headers */
             } else {
-               
                int lost=0;
+               packet_no++;
                if (loss_percent>0 && 100*((float)rand())/RAND_MAX<loss_percent)
                   lost=1;
 
@@ -523,16 +646,32 @@ int main(int argc, char **argv)
 
                /*Copy Ogg packet to Speex bitstream*/
                speex_bits_read_from(&bits, (char*)op.packet, op.bytes);
-               for (j=0;j<nframes;j++)
+               for (j=0;j!=nframes;j++)
                {
+                  int ret;
                   /*Decode frame*/
                   if (!lost)
-                     speex_decode(st, &bits, output);
+                     ret = speex_decode_int(st, &bits, output);
                   else
-                     speex_decode(st, NULL, output);
+                     ret = speex_decode_int(st, NULL, output);
+
+                  /*for (i=0;i<frame_size*channels;i++)
+                    printf ("%d\n", (int)output[i]);*/
 
+                  if (ret==-1)
+                     break;
+                  if (ret==-2)
+                  {
+                     fprintf (stderr, "Decoding error: corrupted stream?\n");
+                     break;
+                  }
+                  if (speex_bits_remaining(&bits)<0)
+                  {
+                     fprintf (stderr, "Decoding overflow: corrupted stream?\n");
+                     break;
+                  }
                   if (channels==2)
-                     speex_decode_stereo(output, frame_size, &stereo);
+                     speex_decode_stereo_int(output, frame_size, &stereo);
 
                   if (print_bitrate) {
                      int tmp;
@@ -541,25 +680,48 @@ int main(int argc, char **argv)
                      fputc (ch, stderr);
                      fprintf (stderr, "Bitrate is use: %d bps     ", tmp);
                   }
-                  /*PCM saturation (just in case)*/
-                  for (i=0;i<frame_size*channels;i++)
-                  {
-                     if (output[i]>32000.0)
-                        output[i]=32000.0;
-                     else if (output[i]<-32000.0)
-                        output[i]=-32000.0;
-                  }
                   /*Convert to short and save to output file*/
-                  for (i=0;i<frame_size*channels;i++)
-                     out[i]=(short)le_short((short)output[i]);
+                 if (strlen(outFile)!=0)
+                  {
+                     for (i=0;i<frame_size*channels;i++)
+                        out[i]=le_short(output[i]);
+                 } else {
+                     for (i=0;i<frame_size*channels;i++)
+                        out[i]=output[i];
+                 }
+                  {
+                     int frame_offset = 0;
+                     int new_frame_size = frame_size;
+                     /*printf ("packet %d %d\n", packet_no, skip_samples);*/
+                     /*fprintf (stderr, "packet %d %d %d\n", packet_no, skip_samples, lookahead);*/
+                     if (packet_no == 1 && j==0 && skip_samples > 0)
+                     {
+                        /*printf ("chopping first packet\n");*/
+                        new_frame_size -= skip_samples+lookahead;
+                        frame_offset = skip_samples+lookahead;
+                     }
+                     if (packet_no == page_nb_packets && skip_samples < 0)
+                     {
+                        int packet_length = nframes*frame_size+skip_samples+lookahead;
+                        new_frame_size = packet_length - j*frame_size;
+                        if (new_frame_size<0)
+                           new_frame_size = 0;
+                        if (new_frame_size>frame_size)
+                           new_frame_size = frame_size;
+                        /*printf ("chopping end: %d %d %d\n", new_frame_size, packet_length, packet_no);*/
+                     }
+                     if (new_frame_size>0)
+                     {  
 #if defined WIN32 || defined _WIN32
-                  if (strlen(outFile)==0)
-                      WIN_Play_Samples (out, sizeof(short) * frame_size*channels);
-                  else
+                        if (strlen(outFile)==0)
+                           WIN_Play_Samples (out+frame_offset*channels, sizeof(short) * new_frame_size*channels);
+                        else
 #endif
-                  fwrite(out, sizeof(short), frame_size*channels, fout);
+                           fwrite(out+frame_offset*channels, sizeof(short), new_frame_size*channels, fout);
                   
-                  audio_size+=sizeof(short)*frame_size*channels;
+                        audio_size+=sizeof(short)*new_frame_size*channels;
+                     }
+                  }
                }
             }
             packet_count++;
@@ -570,7 +732,7 @@ int main(int argc, char **argv)
 
    }
 
-   if (strcmp(outFile+strlen(outFile)-4,".wav")==0 || strcmp(inFile+strlen(inFile)-4,".WAV")==0)
+   if (wav_format)
    {
       if (fseek(fout,4,SEEK_SET)==0)
       {
@@ -592,9 +754,14 @@ int main(int argc, char **argv)
 
    if (st)
       speex_decoder_destroy(st);
+   else 
+   {
+      fprintf (stderr, "This doesn't look like a Speex file\n");
+   }
    speex_bits_destroy(&bits);
+   if (stream_init)
+      ogg_stream_clear(&os);
    ogg_sync_clear(&oy);
-   ogg_stream_clear(&os);
 
 #if defined WIN32 || defined _WIN32
    if (strlen(outFile)==0)
@@ -606,5 +773,5 @@ int main(int argc, char **argv)
    if (fout != NULL)
       fclose(fout);   
 
-   return 1;
+   return 0;
 }