avcodec_decode_audio4 and libswresample
authorJan Gerber <j@xiph.org>
Thu, 6 Mar 2014 13:38:49 +0000 (14:38 +0100)
committerJan Gerber <j@xiph.org>
Thu, 6 Mar 2014 13:42:27 +0000 (14:42 +0100)
add libavresample fallback that should work with libav

SConstruct
src/ffmpeg2theora.c
src/ffmpeg2theora.h
src/libswresample_compat.h [new file with mode: 0644]
src/theorautils.c
src/theorautils.h

index 01d234a..8569104 100644 (file)
@@ -155,7 +155,6 @@ if not env.GetOption('clean'):
       "libavutil",
   ]
   if os.path.exists("./ffmpeg"):
-    FFMPEG_LIBS.append('libswresample')
     pkg_path = list(set(map(os.path.dirname, glob('./ffmpeg/*/*.pc'))))
     pkg_path.append(os.environ.get('PKG_CONFIG_PATH', ''))
     os.environ['PKG_CONFIG_PATH'] = ':'.join(pkg_path)
@@ -168,6 +167,13 @@ if not env.GetOption('clean'):
         '-Lffmpeg/' + lib
       ])
 
+  if conf.CheckPKG('libavresample'):
+    FFMPEG_LIBS.append('libavresample')
+  else:
+    FFMPEG_LIBS.append('libswresample')
+    env.Append(CCFLAGS=[
+      '-DUSE_SWRESAMPLE'
+    ])
 
   if not conf.CheckPKG(' '.join(FFMPEG_LIBS)): 
     print """
index 410d502..9e7ffe2 100644 (file)
 #include "libswscale/swscale.h"
 #include "libpostproc/postprocess.h"
 
+#include "libavutil/opt.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample_compat.h"
+
 #include "theora/theoraenc.h"
 #include "vorbis/codec.h"
 #include "vorbis/vorbisenc.h"
@@ -537,6 +542,11 @@ void ff2theora_output(ff2theora this) {
     int synced = this->start_time == 0.0;
     AVRational display_aspect_ratio, sample_aspect_ratio;
 
+    struct SwrContext *swr_ctx;
+    uint8_t **dst_audio_data = NULL;
+    int dst_linesize;
+    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
+
     if (this->audiostream >= 0 && this->context->nb_streams > this->audiostream) {
         AVCodecContext *enc = this->context->streams[this->audiostream]->codec;
         if (enc->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -962,22 +972,43 @@ void ff2theora_output(ff2theora this) {
         if (acodec != NULL && avcodec_open2 (aenc, acodec, NULL) >= 0) {
             if (this->sample_rate != sample_rate
                 || this->channels != aenc->channels
-                || aenc->sample_fmt != AV_SAMPLE_FMT_S16) {
-                // values take from libavcodec/resample.c
-                this->audio_resample_ctx = av_audio_resample_init(this->channels,    aenc->channels,
-                                                                  this->sample_rate, sample_rate,
-                                                                  AV_SAMPLE_FMT_S16,    aenc->sample_fmt,
-                                                                  16, 10, 0, 0.8);
-                if (!this->audio_resample_ctx) {
-                    this->channels = aenc->channels;
+                || aenc->sample_fmt != AV_SAMPLE_FMT_FLTP) {
+                swr_ctx = swr_alloc();
+                /* set options */
+                if (aenc->channel_layout) {
+                    av_opt_set_int(swr_ctx, "in_channel_layout",    aenc->channel_layout, 0);
+                } else {
+                    av_opt_set_int(swr_ctx, "in_channel_layout", av_get_default_channel_layout(aenc->channels), 0);
+                }
+                av_opt_set_int(swr_ctx, "in_sample_rate",       aenc->sample_rate, 0);
+                av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", aenc->sample_fmt, 0);
+
+                av_opt_set_int(swr_ctx, "out_channel_layout", av_get_default_channel_layout(this->channels), 0);
+                av_opt_set_int(swr_ctx, "out_sample_rate",       this->sample_rate, 0);
+                av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
+
+                /* initialize the resampling context */
+                if (swr_init(swr_ctx) < 0) {
+                    fprintf(stderr, "Failed to initialize the resampling context\n");
+                    exit(1);
                 }
+
+                max_dst_nb_samples = dst_nb_samples =
+                    av_rescale_rnd(src_nb_samples, this->sample_rate, sample_rate, AV_ROUND_UP);
+
+                if (av_samples_alloc_array_and_samples(&dst_audio_data, &dst_linesize, this->channels,
+                                                         dst_nb_samples, AV_SAMPLE_FMT_FLTP, 0) < 0) {
+                    fprintf(stderr, "Could not allocate destination samples\n");
+                    exit(1);
+                }
+
                 if (!info.frontend && this->sample_rate!=sample_rate)
                     fprintf(stderr, "  Resample: %dHz => %dHz\n", sample_rate,this->sample_rate);
                 if (!info.frontend && this->channels!=aenc->channels)
                     fprintf(stderr, "  Channels: %d => %d\n",aenc->channels,this->channels);
             }
             else{
-                this->audio_resample_ctx=NULL;
+                swr_ctx = NULL;
             }
         }
         else{
@@ -1068,13 +1099,12 @@ void ff2theora_output(ff2theora this) {
         AVPacket pkt;
         AVPacket avpkt;
         int len1;
-        int got_picture;
+        int got_frame;
         int first = 1;
         int audio_eos = 0, video_eos = 0, audio_done = 0, video_done = 0;
         int ret;
-        int16_t *audio_buf=av_malloc(4*MAX_AUDIO_FRAME_SIZE);
-        int16_t *resampled=av_malloc(4*MAX_AUDIO_FRAME_SIZE);
-        int16_t *audio_p=NULL;
+        AVFrame *audio_frame = NULL;
+        uint8_t **audio_p = NULL;
         int no_frames;
         int no_samples;
 
@@ -1370,7 +1400,7 @@ void ff2theora_output(ff2theora this) {
                       first frame decodec in case its not a keyframe
                     */
                     if (pkt.stream_index == this->video_index) {
-                      avcodec_decode_video2(venc, frame, &got_picture, &pkt);
+                      avcodec_decode_video2(venc, frame, &got_frame, &pkt);
                     }
                     av_free_packet (&pkt);
                     continue;
@@ -1389,9 +1419,9 @@ void ff2theora_output(ff2theora this) {
                 while(video_eos || avpkt.size > 0) {
                     int dups = 0;
                     static th_ycbcr_buffer ycbcr;
-                    len1 = avcodec_decode_video2(venc, frame, &got_picture, &avpkt);
+                    len1 = avcodec_decode_video2(venc, frame, &got_frame, &avpkt);
                     if (len1>=0) {
-                        if (got_picture) {
+                        if (got_frame) {
                             // this is disabled by default since it does not work
                             // for all input formats the way it should.
                             if (this->sync == 1 && pkt.dts != AV_NOPTS_VALUE) {
@@ -1428,7 +1458,7 @@ void ff2theora_output(ff2theora this) {
 
                             if (venc_pix_fmt != this->pix_fmt) {
                                 sws_scale(this->sws_colorspace_ctx,
-                                frame->data, frame->linesize, 0, display_height,
+                                (const uint8_t * const*)frame->data, frame->linesize, 0, display_height,
                                 output_tmp->data, output_tmp->linesize);
                             }
                             else{
@@ -1472,7 +1502,7 @@ void ff2theora_output(ff2theora this) {
                             }
                             if (this->sws_scale_ctx) {
                                 sws_scale(this->sws_scale_ctx,
-                                    output_cropped->data,
+                                    (const uint8_t * const*)output_cropped->data,
                                     output_cropped->linesize, 0,
                                     display_height - (this->frame_topBand + this->frame_bottomBand),
                                     output_resized->data,
@@ -1500,7 +1530,7 @@ void ff2theora_output(ff2theora this) {
                     //now output_resized
 
                     if (!first) {
-                        if (got_picture || video_eos) {
+                        if (got_frame || video_eos) {
                             prepare_ycbcr_buffer(this, ycbcr, output_buffered);
                             if(dups>0) {
                                 //this only works if dups < keyint,
@@ -1520,11 +1550,11 @@ void ff2theora_output(ff2theora this) {
                                 info.videotime = this->frame_count / av_q2d(this->framerate);
                         }
                     }
-                    if (got_picture) {
+                    if (got_frame) {
                         first=0;
                         av_picture_copy((AVPicture *)output_buffered, (AVPicture *)output_padded, this->pix_fmt, this->frame_width, this->frame_height);
                     }
-                    if (!got_picture) {
+                    if (!got_frame) {
                         break;
                     }
                 }
@@ -1532,42 +1562,62 @@ void ff2theora_output(ff2theora this) {
             if (info.passno!=1)
               if ((audio_eos && !audio_done) || (ret >= 0 && pkt.stream_index == this->audio_index)) {
                 while((audio_eos && !audio_done) || avpkt.size > 0 ) {
-                    int samples=0;
-                    int samples_out=0;
-                    int data_size = 4*MAX_AUDIO_FRAME_SIZE;
                     int bytes_per_sample = av_get_bytes_per_sample(aenc->sample_fmt);
 
                     if (avpkt.size > 0) {
-                        len1 = avcodec_decode_audio3(astream->codec, audio_buf, &data_size, &avpkt);
+                        if (!audio_frame && !(audio_frame = avcodec_alloc_frame())) {
+                            fprintf(stderr, "Failed to allocate memory\n");
+                            exit(1);
+                        }
+                        len1 = avcodec_decode_audio4(astream->codec, audio_frame, &got_frame, &avpkt);
                         if (len1 < 0) {
                             /* if error, we skip the frame */
                             break;
                         }
-                        avpkt.size -= len1;
-                        avpkt.data += len1;
-                        if (data_size >0) {
-                            samples = data_size / (aenc->channels * bytes_per_sample);
-                            samples_out = samples;
-                            if (this->audio_resample_ctx) {
-                                samples_out = audio_resample(this->audio_resample_ctx, resampled, audio_buf, samples);
-                                audio_p = resampled;
+                        /* Some audio decoders decode only part of the packet, and have to be
+                         * called again with the remainder of the packet data.
+                         * Sample: http://fate-suite.libav.org/lossless-audio/luckynight-partial.shn
+                         * Also, some decoders might over-read the packet. */
+                        len1 = FFMIN(len1, avpkt.size);
+                        if (got_frame) {
+                            dst_nb_samples = audio_frame->nb_samples;
+                            if (swr_ctx) {
+                                dst_nb_samples = av_rescale_rnd(audio_frame->nb_samples,
+                                    this->sample_rate, aenc->sample_rate, AV_ROUND_UP);
+                                if (dst_nb_samples > max_dst_nb_samples) {
+                                    av_free(dst_audio_data[0]);
+                                    if (av_samples_alloc(dst_audio_data, &dst_linesize, this->channels,
+                                                           dst_nb_samples, AV_SAMPLE_FMT_FLTP, 1) < 0) {
+                                        fprintf(stderr, "Error while converting audio\n");
+                                        exit(1);
+                                    }
+                                    max_dst_nb_samples = dst_nb_samples;
+                                }
+                                if (swr_convert(swr_ctx, dst_audio_data, dst_nb_samples,
+                                    (const uint8_t**)audio_frame->extended_data, audio_frame->nb_samples) < 0) {
+                                    fprintf(stderr, "Error while converting audio\n");
+                                    exit(1);
+                                }
+                                audio_p = dst_audio_data;
+                            } else {
+                                audio_p = audio_frame->extended_data;
                             }
-                            else
-                                audio_p = audio_buf;
                         }
+                        avpkt.size -= len1;
+                        avpkt.data += len1;
                     }
-
-                    if (no_samples > 0 && this->sample_count + samples_out > no_samples) {
-                        audio_eos = 1;
-                        samples_out = no_samples - this->sample_count;
-                        if (samples_out <= 0) {
-                            break;
+                    if(got_frame || audio_eos) {
+                        if (no_samples > 0 && this->sample_count + dst_nb_samples > no_samples) {
+                            audio_eos = 1;
+                            dst_nb_samples = no_samples - this->sample_count;
+                            if (dst_nb_samples <= 0) {
+                                break;
+                            }
                         }
+                        oggmux_add_audio(&info, audio_p, dst_nb_samples, audio_eos);
+                        avcodec_free_frame(&audio_frame);
+                        this->sample_count += dst_nb_samples;
                     }
-
-                    oggmux_add_audio(&info, audio_p,
-                        samples_out * (this->channels), samples_out, audio_eos);
-                    this->sample_count += samples_out;
                     if(audio_eos) {
                         audio_done = 1;
                     }
@@ -1752,8 +1802,8 @@ void ff2theora_output(ff2theora this) {
             avcodec_close(venc);
         }
         if (this->audio_index >= 0) {
-            if (this->audio_resample_ctx)
-                audio_resample_close(this->audio_resample_ctx);
+            if (swr_ctx)
+                swr_free(&swr_ctx);
             avcodec_close(aenc);
         }
 
@@ -1774,8 +1824,12 @@ void ff2theora_output(ff2theora this) {
             frame_dealloc(output_cropped_p);
             frame_dealloc(output_padded_p);
         }
-        av_free(audio_buf);
-        av_free(resampled);
+        if (dst_audio_data)
+            av_freep(&dst_audio_data[0]);
+        av_freep(&dst_audio_data);
+        if(swr_ctx) {
+            swr_close(swr_ctx);
+        }
     }
     else{
         fprintf(stderr, "No video or audio stream found.\n");
index ddb3c56..82c75ae 100644 (file)
@@ -62,7 +62,6 @@ typedef struct ff2theora{
     double fps;
     struct SwsContext *sws_colorspace_ctx; /* for image resampling/resizing */
     struct SwsContext *sws_scale_ctx; /* for image resampling/resizing */
-    ReSampleContext *audio_resample_ctx;
     ogg_int32_t aspect_numerator;
     ogg_int32_t aspect_denominator;
     int colorspace;
diff --git a/src/libswresample_compat.h b/src/libswresample_compat.h
new file mode 100644 (file)
index 0000000..fe23292
--- /dev/null
@@ -0,0 +1,23 @@
+// This header serves to smooth out the differences in FFmpeg and LibAV.
+
+#ifdef USE_SWRESAMPLE
+
+    #include <libswresample/swresample.h>
+
+    //swr does not have the equivalent so this does nothing
+    void swr_close(SwrContext *ctx) {};
+
+#else
+
+    #include <libavresample/avresample.h>
+
+    #define SwrContext AVAudioResampleContext
+    #define swr_init(ctx) avresample_open(ctx)
+    #define swr_close(ctx) avresample_close(ctx)
+    #define swr_free(ctx) avresample_free(ctx)
+    #define swr_alloc() avresample_alloc_context()
+    #define swr_get_delay(ctx, ...) avresample_get_delay(ctx)
+    #define swr_convert(ctx, out, out_count, in, in_count) \
+       avresample_convert(ctx, out, 0, out_count, (uint8_t **)in, 0, in_count)
+
+#endif
index 1307704..52995bd 100644 (file)
@@ -1219,17 +1219,16 @@ vorbis_time(vorbis_dsp_state * dsp, ogg_int64_t granulepos) {
 /**
  * adds audio samples to encoding sink
  * @param buffer pointer to buffer
- * @param bytes bytes in buffer
  * @param samples samples in buffer
  * @param e_o_s 1 indicates end of stream.
  */
-void oggmux_add_audio (oggmux_info *info, int16_t * buffer, int bytes, int samples, int e_o_s) {
+void oggmux_add_audio (oggmux_info *info, uint8_t **buffer, int samples, int e_o_s) {
     ogg_packet op;
 
     int i, j, k, count = 0;
     float **vorbis_buffer;
 
-    if (bytes <= 0 && samples <= 0) {
+    if (samples <= 0) {
         /* end of audio stream */
         if (e_o_s)
             vorbis_analysis_wrote (&info->vd, 0);
@@ -1252,7 +1251,7 @@ void oggmux_add_audio (oggmux_info *info, int16_t * buffer, int bytes, int sampl
                         default: k = j;
                     }
                 }
-                vorbis_buffer[k][i] = buffer[count++] / 32768.f;
+                vorbis_buffer[k][i] = ((const float  *)buffer[j])[i];
             }
         }
         vorbis_analysis_wrote (&info->vd, samples);
@@ -1291,8 +1290,8 @@ void oggmux_add_audio (oggmux_info *info, int16_t * buffer, int bytes, int sampl
                 if (op.packetno != 4) {
                     /* We only expect negative start granule in the first content
                        packet, not any of the others... */
-                    fprintf(stderr, "WARNING: vorbis packet %lld has calculated start"
-                            " granule of %lld, but it should be non-negative!",
+                    fprintf(stderr, "WARNING: vorbis packet %" PRId64 " has calculated start"
+                            " granule of %" PRId64 ", but it should be non-negative!",
                             op.packetno, start_granule);
                 }
                 start_granule = 0;
@@ -1302,7 +1301,7 @@ void oggmux_add_audio (oggmux_info *info, int16_t * buffer, int bytes, int sampl
                    allowed by the specification in the last packet only, and the
                    trailing samples should be discarded and not played/indexed. */
                 if (!op.e_o_s) {
-                    fprintf(stderr, "WARNING: vorbis packet %lld (granulepos %lld) starts before"
+                    fprintf(stderr, "WARNING: vorbis packet %" PRId64 " (granulepos %" PRId64 ") starts before"
                             " the end of the preceeding packet!", op.packetno, op.granulepos);
                 }
                 start_granule = info->vorbis_granulepos;
index 04b6675..12fb9d2 100644 (file)
@@ -168,7 +168,7 @@ void init_info(oggmux_info *info);
 extern void oggmux_setup_kate_streams(oggmux_info *info, int n_kate_streams);
 extern void oggmux_init (oggmux_info *info);
 extern void oggmux_add_video (oggmux_info *info, th_ycbcr_buffer ycbcr, int e_o_s);
-extern void oggmux_add_audio (oggmux_info *info, int16_t * readbuffer, int bytesread, int samplesread,int e_o_s);
+extern void oggmux_add_audio (oggmux_info *info, uint8_t **buffer, int samples,int e_o_s);
 #ifdef HAVE_KATE
 extern void oggmux_add_kate_text (oggmux_info *info, int idx, double t0, double t1, const char *text, size_t len, int x1, int x2, int y1, int y2);
 extern void oggmux_add_kate_image (oggmux_info *info, int idx, double t0, double t1, const kate_region *kr, const kate_palette *kp, const kate_bitmap *kb);