Add c64x changes for r17563, as they were accidentally omitted.
[theora.git] / examples / encoder_example.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12
13   function: example encoder application; makes an Ogg Theora/Vorbis
14             file from YUV4MPEG2 and WAV input
15   last mod: $Id$
16
17  ********************************************************************/
18
19 #if !defined(_REENTRANT)
20 #define _REENTRANT
21 #endif
22 #if !defined(_GNU_SOURCE)
23 #define _GNU_SOURCE
24 #endif
25 #if !defined(_LARGEFILE_SOURCE)
26 #define _LARGEFILE_SOURCE
27 #endif
28 #if !defined(_LARGEFILE64_SOURCE)
29 #define _LARGEFILE64_SOURCE
30 #endif
31 #if !defined(_FILE_OFFSET_BITS)
32 #define _FILE_OFFSET_BITS 64
33 #endif
34 /*#define OC_COLLECT_METRICS*/
35
36 #include <stdio.h>
37 #if !defined(_WIN32)
38 #include <getopt.h>
39 #include <unistd.h>
40 #else
41 #include "getopt.h"
42 #endif
43 #include <stdlib.h>
44 #include <string.h>
45 #include <time.h>
46 #include <math.h>
47 #include "theora/theoraenc.h"
48 #include "vorbis/codec.h"
49 #include "vorbis/vorbisenc.h"
50
51 #ifdef _WIN32
52 /*supply missing headers and functions to Win32. going to hell, I know*/
53 #include <fcntl.h>
54 #include <io.h>
55
56 static double rint(double x)
57 {
58   if (x < 0.0)
59     return (double)(int)(x - 0.5);
60   else
61     return (double)(int)(x + 0.5);
62 }
63 #endif
64
65 #if defined(OC_COLLECT_METRICS)
66 # define TH_ENCCTL_SET_METRICS_FILE (0x8000)
67 #endif
68
69 const char *optstring = "b:e:o:a:A:v:V:s:S:f:F:ck:d:z:\1\2\3\4"
70 #if defined(OC_COLLECT_METRICS)
71  "m:"
72 #endif
73  ;
74 struct option options [] = {
75   {"begin-time",required_argument,NULL,'b'},
76   {"end-time",required_argument,NULL,'e'},
77   {"output",required_argument,NULL,'o'},
78   {"audio-rate-target",required_argument,NULL,'A'},
79   {"video-rate-target",required_argument,NULL,'V'},
80   {"audio-quality",required_argument,NULL,'a'},
81   {"video-quality",required_argument,NULL,'v'},
82   {"aspect-numerator",required_argument,NULL,'s'},
83   {"aspect-denominator",required_argument,NULL,'S'},
84   {"framerate-numerator",required_argument,NULL,'f'},
85   {"framerate-denominator",required_argument,NULL,'F'},
86   {"vp3-compatible",no_argument,NULL,'c'},
87   {"speed",required_argument,NULL,'z'},
88   {"soft-target",no_argument,NULL,'\1'},
89   {"keyframe-freq",required_argument,NULL,'k'},
90   {"buf-delay",required_argument,NULL,'d'},
91   {"two-pass",no_argument,NULL,'\2'},
92   {"first-pass",required_argument,NULL,'\3'},
93   {"second-pass",required_argument,NULL,'\4'},
94 #if defined(OC_COLLECT_METRICS)
95   {"metrics-file",required_argument,NULL,'m'},
96 #endif
97   {NULL,0,NULL,0}
98 };
99
100 /* You'll go to Hell for using globals. */
101
102 FILE *audio=NULL;
103 FILE *video=NULL;
104
105 int audio_ch=0;
106 int audio_hz=0;
107
108 float audio_q=.1f;
109 int audio_r=-1;
110 int vp3_compatible=0;
111
112 int frame_w=0;
113 int frame_h=0;
114 int pic_w=0;
115 int pic_h=0;
116 int pic_x=0;
117 int pic_y=0;
118 int video_fps_n=-1;
119 int video_fps_d=-1;
120 int video_par_n=-1;
121 int video_par_d=-1;
122 char interlace;
123 int src_c_dec_h=2;
124 int src_c_dec_v=2;
125 int dst_c_dec_h=2;
126 int dst_c_dec_v=2;
127 char chroma_type[16];
128
129 /*The size of each converted frame buffer.*/
130 size_t y4m_dst_buf_sz;
131 /*The amount to read directly into the converted frame buffer.*/
132 size_t y4m_dst_buf_read_sz;
133 /*The size of the auxilliary buffer.*/
134 size_t y4m_aux_buf_sz;
135 /*The amount to read into the auxilliary buffer.*/
136 size_t y4m_aux_buf_read_sz;
137
138 /*The function used to perform chroma conversion.*/
139 typedef void (*y4m_convert_func)(unsigned char *_dst,unsigned char *_aux);
140
141 y4m_convert_func y4m_convert=NULL;
142
143 int video_r=-1;
144 int video_q=-1;
145 ogg_uint32_t keyframe_frequency=0;
146 int buf_delay=-1;
147
148 long begin_sec=-1;
149 long begin_usec=0;
150 long end_sec=-1;
151 long end_usec=0;
152
153 static void usage(void){
154   fprintf(stderr,
155           "Usage: encoder_example [options] [audio_file] video_file\n\n"
156           "Options: \n\n"
157           "  -o --output <filename.ogv>      file name for encoded output;\n"
158           "                                  If this option is not given, the\n"
159           "                                  compressed data is sent to stdout.\n\n"
160           "  -A --audio-rate-target <n>      bitrate target for Vorbis audio;\n"
161           "                                  use -a and not -A if at all possible,\n"
162           "                                  as -a gives higher quality for a given\n"
163           "                                  bitrate.\n\n"
164           "  -V --video-rate-target <n>      bitrate target for Theora video\n\n"
165           "     --soft-target                Use a large reservoir and treat the rate\n"
166           "                                  as a soft target; rate control is less\n"
167           "                                  strict but resulting quality is usually\n"
168           "                                  higher/smoother overall. Soft target also\n"
169           "                                  allows an optional -v setting to specify\n"
170           "                                  a minimum allowed quality.\n\n"
171           "     --two-pass                   Compress input using two-pass rate control\n"
172           "                                  This option requires that the input to the\n"
173           "                                  to the encoder is seekable and performs\n"
174           "                                  both passes automatically.\n\n"
175           "     --first-pass <filename>      Perform first-pass of a two-pass rate\n"
176           "                                  controlled encoding, saving pass data to\n"
177           "                                  <filename> for a later second pass\n\n"
178           "     --second-pass <filename>     Perform second-pass of a two-pass rate\n"
179           "                                  controlled encoding, reading first-pass\n"
180           "                                  data from <filename>.  The first pass\n"
181           "                                  data must come from a first encoding pass\n"
182           "                                  using identical input video to work\n"
183           "                                  properly.\n\n"
184           "  -a --audio-quality <n>          Vorbis quality selector from -1 to 10\n"
185           "                                  (-1 yields smallest files but lowest\n"
186           "                                  fidelity; 10 yields highest fidelity\n"
187           "                                  but large files. '2' is a reasonable\n"
188           "                                  default).\n\n"
189           "   -v --video-quality <n>         Theora quality selector from 0 to 10\n"
190           "                                  (0 yields smallest files but lowest\n"
191           "                                  video quality. 10 yields highest\n"
192           "                                  fidelity but large files).\n\n"
193           "   -s --aspect-numerator <n>      Aspect ratio numerator, default is 0\n"
194           "                                  or extracted from YUV input file\n"
195           "   -S --aspect-denominator <n>    Aspect ratio denominator, default is 0\n"
196           "                                  or extracted from YUV input file\n"
197           "   -f --framerate-numerator <n>   Frame rate numerator, can be extracted\n"
198           "                                  from YUV input file. ex: 30000000\n"
199           "   -F --framerate-denominator <n> Frame rate denominator, can be extracted\n"
200           "                                  from YUV input file. ex: 1000000\n"
201           "                                  The frame rate nominator divided by this\n"
202           "                                  determinates the frame rate in units per tick\n"
203           "   -k --keyframe-freq <n>         Keyframe frequency\n"
204           "   -z --speed <n>                 Sets the encoder speed level. Higher speed\n"
205           "                                  levels favor quicker encoding over better\n"
206           "                                  quality per bit. Depending on the encoding\n"
207           "                                  mode, and the internal algorithms used,\n"
208           "                                  quality may actually improve with higher\n"
209           "                                  speeds, but in this case bitrate will also\n"
210           "                                  likely increase. The maximum value, and the\n"
211           "                                  meaning of each value, are implementation-\n"
212           "                                  specific and may change depending on the\n"
213           "                                  current encoding mode (rate constrained,\n"
214           "                                  two-pass, etc.).\n"
215           "   -d --buf-delay <n>             Buffer delay (in frames). Longer delays\n"
216           "                                  allow smoother rate adaptation and provide\n"
217           "                                  better overall quality, but require more\n"
218           "                                  client side buffering and add latency. The\n"
219           "                                  default value is the keyframe interval for\n"
220           "                                  one-pass encoding (or somewhat larger if\n"
221           "                                  --soft-target is used) and infinite for\n"
222           "                                  two-pass encoding.\n"
223           "   -b --begin-time <h:m:s.d>      Begin encoding at offset into input\n"
224           "   -e --end-time <h:m:s.d>        End encoding at offset into input\n"
225 #if defined(OC_COLLECT_METRICS)
226           "   -m --metrics-filename          File in which to accumulate mode decision\n"
227           "                                  metrics. Statistics from the current\n"
228           "                                  encode will be merged with those already\n"
229           "                                  in the file if it exists.\n"
230 #endif
231           "encoder_example accepts only uncompressed RIFF WAV format audio and\n"
232           "YUV4MPEG2 uncompressed video.\n\n");
233   exit(1);
234 }
235
236 static int y4m_parse_tags(char *_tags){
237   int   got_w;
238   int   got_h;
239   int   got_fps;
240   int   got_interlace;
241   int   got_par;
242   int   got_chroma;
243   int   tmp_video_fps_n;
244   int   tmp_video_fps_d;
245   int   tmp_video_par_n;
246   int   tmp_video_par_d;
247   char *p;
248   char *q;
249   got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
250   for(p=_tags;;p=q){
251     /*Skip any leading spaces.*/
252     while(*p==' ')p++;
253     /*If that's all we have, stop.*/
254     if(p[0]=='\0')break;
255     /*Find the end of this tag.*/
256     for(q=p+1;*q!='\0'&&*q!=' ';q++);
257     /*Process the tag.*/
258     switch(p[0]){
259       case 'W':{
260         if(sscanf(p+1,"%d",&pic_w)!=1)return -1;
261         got_w=1;
262       }break;
263       case 'H':{
264         if(sscanf(p+1,"%d",&pic_h)!=1)return -1;
265         got_h=1;
266       }break;
267       case 'F':{
268         if(sscanf(p+1,"%d:%d",&tmp_video_fps_n,&tmp_video_fps_d)!=2)return -1;
269         got_fps=1;
270       }break;
271       case 'I':{
272         interlace=p[1];
273         got_interlace=1;
274       }break;
275       case 'A':{
276         if(sscanf(p+1,"%d:%d",&tmp_video_par_n,&tmp_video_par_d)!=2)return -1;
277         got_par=1;
278       }break;
279       case 'C':{
280         if(q-p>16)return -1;
281         memcpy(chroma_type,p+1,q-p-1);
282         chroma_type[q-p-1]='\0';
283         got_chroma=1;
284       }break;
285       /*Ignore unknown tags.*/
286     }
287   }
288   if(!got_w||!got_h||!got_fps||!got_interlace||!got_par)return -1;
289   /*Chroma-type is not specified in older files, e.g., those generated by
290      mplayer.*/
291   if(!got_chroma)strcpy(chroma_type,"420");
292   /*Update fps and aspect ratio globals if not specified in the command line.*/
293   if(video_fps_n==-1)video_fps_n=tmp_video_fps_n;
294   if(video_fps_d==-1)video_fps_d=tmp_video_fps_d;
295   if(video_par_n==-1)video_par_n=tmp_video_par_n;
296   if(video_par_d==-1)video_par_d=tmp_video_par_d;
297   return 0;
298 }
299
300 /*All anti-aliasing filters in the following conversion functions are based on
301    one of two window functions:
302   The 6-tap Lanczos window (for down-sampling and shifts):
303    sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
304    0,                         |t|>=3
305   The 4-tap Mitchell window (for up-sampling):
306    7|t|^3-12|t|^2+16/3,             |t|<1
307    -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
308    0,                               |t|>=2
309   The number of taps is intentionally kept small to reduce computational
310    overhead and limit ringing.
311
312   The taps from these filters are scaled so that their sum is 1, and the result
313    is scaled by 128 and rounded to integers to create a filter whose
314    intermediate values fit inside 16 bits.
315   Coefficients are rounded in such a way as to ensure their sum is still 128,
316    which is usually equivalent to normal rounding.*/
317
318 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
319 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
320 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
321
322 /*420jpeg chroma samples are sited like:
323   Y-------Y-------Y-------Y-------
324   |       |       |       |
325   |   BR  |       |   BR  |
326   |       |       |       |
327   Y-------Y-------Y-------Y-------
328   |       |       |       |
329   |       |       |       |
330   |       |       |       |
331   Y-------Y-------Y-------Y-------
332   |       |       |       |
333   |   BR  |       |   BR  |
334   |       |       |       |
335   Y-------Y-------Y-------Y-------
336   |       |       |       |
337   |       |       |       |
338   |       |       |       |
339
340   420mpeg2 chroma samples are sited like:
341   Y-------Y-------Y-------Y-------
342   |       |       |       |
343   BR      |       BR      |
344   |       |       |       |
345   Y-------Y-------Y-------Y-------
346   |       |       |       |
347   |       |       |       |
348   |       |       |       |
349   Y-------Y-------Y-------Y-------
350   |       |       |       |
351   BR      |       BR      |
352   |       |       |       |
353   Y-------Y-------Y-------Y-------
354   |       |       |       |
355   |       |       |       |
356   |       |       |       |
357
358   We use a resampling filter to shift the site locations one quarter pixel (at
359    the chroma plane's resolution) to the right.
360   The 4:2:2 modes look exactly the same, except there are twice as many chroma
361    lines, and they are vertically co-sited with the luma samples in both the
362    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
363 static void y4m_convert_42xmpeg2_42xjpeg(unsigned char *_dst,
364  unsigned char *_aux){
365   int c_w;
366   int c_h;
367   int pli;
368   int y;
369   int x;
370   /*Skip past the luma data.*/
371   _dst+=pic_w*pic_h;
372   /*Compute the size of each chroma plane.*/
373   c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
374   c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
375   for(pli=1;pli<3;pli++){
376     for(y=0;y<c_h;y++){
377       /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
378          window.*/
379       for(x=0;x<OC_MINI(c_w,2);x++){
380         _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
381          114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
382          _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
383       }
384       for(;x<c_w-3;x++){
385         _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
386          114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
387       }
388       for(;x<c_w;x++){
389         _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
390          114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
391          _aux[c_w-1]+64>>7,255);
392       }
393       _dst+=c_w;
394       _aux+=c_w;
395     }
396   }
397 }
398
399 /*This format is only used for interlaced content, but is included for
400    completeness.
401
402   420jpeg chroma samples are sited like:
403   Y-------Y-------Y-------Y-------
404   |       |       |       |
405   |   BR  |       |   BR  |
406   |       |       |       |
407   Y-------Y-------Y-------Y-------
408   |       |       |       |
409   |       |       |       |
410   |       |       |       |
411   Y-------Y-------Y-------Y-------
412   |       |       |       |
413   |   BR  |       |   BR  |
414   |       |       |       |
415   Y-------Y-------Y-------Y-------
416   |       |       |       |
417   |       |       |       |
418   |       |       |       |
419
420   420paldv chroma samples are sited like:
421   YR------Y-------YR------Y-------
422   |       |       |       |
423   |       |       |       |
424   |       |       |       |
425   YB------Y-------YB------Y-------
426   |       |       |       |
427   |       |       |       |
428   |       |       |       |
429   YR------Y-------YR------Y-------
430   |       |       |       |
431   |       |       |       |
432   |       |       |       |
433   YB------Y-------YB------Y-------
434   |       |       |       |
435   |       |       |       |
436   |       |       |       |
437
438   We use a resampling filter to shift the site locations one quarter pixel (at
439    the chroma plane's resolution) to the right.
440   Then we use another filter to move the C_r location down one quarter pixel,
441    and the C_b location up one quarter pixel.*/
442 static void y4m_convert_42xpaldv_42xjpeg(unsigned char *_dst,
443  unsigned char *_aux){
444   unsigned char *tmp;
445   int            c_w;
446   int            c_h;
447   int            c_sz;
448   int            pli;
449   int            y;
450   int            x;
451   /*Skip past the luma data.*/
452   _dst+=pic_w*pic_h;
453   /*Compute the size of each chroma plane.*/
454   c_w=(pic_w+1)/2;
455   c_h=(pic_h+dst_c_dec_h-1)/dst_c_dec_h;
456   c_sz=c_w*c_h;
457   /*First do the horizontal re-sampling.
458     This is the same as the mpeg2 case, except that after the horizontal case,
459      we need to apply a second vertical filter.*/
460   tmp=_aux+2*c_sz;
461   for(pli=1;pli<3;pli++){
462     for(y=0;y<c_h;y++){
463       /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
464          window.*/
465       for(x=0;x<OC_MINI(c_w,2);x++){
466         tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
467          114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
468          _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
469       }
470       for(;x<c_w-3;x++){
471         tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
472          114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
473       }
474       for(;x<c_w;x++){
475         tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
476          114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
477          _aux[c_w-1]+64>>7,255);
478       }
479       tmp+=c_w;
480       _aux+=c_w;
481     }
482     switch(pli){
483       case 1:{
484         tmp-=c_sz;
485         /*Slide C_b up a quarter-pel.
486           This is the same filter used above, but in the other order.*/
487         for(x=0;x<c_w;x++){
488           for(y=0;y<OC_MINI(c_h,3);y++){
489             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[0]-
490              9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]+
491              114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]+
492              4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64>>7,255);
493           }
494           for(;y<c_h-2;y++){
495             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
496              9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
497              17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64>>7,255);
498           }
499           for(;y<c_h;y++){
500             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
501              9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
502              17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64>>7,255);
503           }
504           _dst++;
505           tmp++;
506         }
507         _dst+=c_sz-c_w;
508         tmp-=c_w;
509       }break;
510       case 2:{
511         tmp-=c_sz;
512         /*Slide C_r down a quarter-pel.
513           This is the same as the horizontal filter.*/
514         for(x=0;x<c_w;x++){
515           for(y=0;y<OC_MINI(c_h,2);y++){
516             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[0]-
517              17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]+
518              35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]+
519              tmp[OC_MINI(y+3,c_h-1)*c_w]+64>>7,255);
520           }
521           for(;y<c_h-3;y++){
522             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
523              17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]-
524              9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64>>7,255);
525           }
526           for(;y<c_h;y++){
527             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
528              17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]-
529              9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64>>7,255);
530           }
531           _dst++;
532           tmp++;
533         }
534       }break;
535     }
536     /*For actual interlaced material, this would have to be done separately on
537        each field, and the shift amounts would be different.
538       C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
539        C_b up 1/8 in the bottom field.
540       The corresponding filters would be:
541        Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
542        Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
543   }
544 }
545
546 /*422jpeg chroma samples are sited like:
547   Y---BR--Y-------Y---BR--Y-------
548   |       |       |       |
549   |       |       |       |
550   |       |       |       |
551   Y---BR--Y-------Y---BR--Y-------
552   |       |       |       |
553   |       |       |       |
554   |       |       |       |
555   Y---BR--Y-------Y---BR--Y-------
556   |       |       |       |
557   |       |       |       |
558   |       |       |       |
559   Y---BR--Y-------Y---BR--Y-------
560   |       |       |       |
561   |       |       |       |
562   |       |       |       |
563
564   411 chroma samples are sited like:
565   YBR-----Y-------Y-------Y-------
566   |       |       |       |
567   |       |       |       |
568   |       |       |       |
569   YBR-----Y-------Y-------Y-------
570   |       |       |       |
571   |       |       |       |
572   |       |       |       |
573   YBR-----Y-------Y-------Y-------
574   |       |       |       |
575   |       |       |       |
576   |       |       |       |
577   YBR-----Y-------Y-------Y-------
578   |       |       |       |
579   |       |       |       |
580   |       |       |       |
581
582   We use a filter to resample at site locations one eighth pixel (at the source
583    chroma plane's horizontal resolution) and five eighths of a pixel to the
584    right.*/
585 static void y4m_convert_411_422jpeg(unsigned char *_dst,
586  unsigned char *_aux){
587   int c_w;
588   int dst_c_w;
589   int c_h;
590   int pli;
591   int y;
592   int x;
593   /*Skip past the luma data.*/
594   _dst+=pic_w*pic_h;
595   /*Compute the size of each chroma plane.*/
596   c_w=(pic_w+src_c_dec_h-1)/src_c_dec_h;
597   dst_c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
598   c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
599   for(pli=1;pli<3;pli++){
600     for(y=0;y<c_h;y++){
601       /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
602          4-tap Mitchell window.*/
603       for(x=0;x<OC_MINI(c_w,1);x++){
604         _dst[x<<1]=(unsigned char)OC_CLAMPI(0,111*_aux[0]+
605          18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64>>7,255);
606         _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,47*_aux[0]+
607          86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64>>7,255);
608       }
609       for(;x<c_w-2;x++){
610         _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
611          18*_aux[x+1]-_aux[x+2]+64>>7,255);
612         _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
613          86*_aux[x+1]-5*_aux[x+2]+64>>7,255);
614       }
615       for(;x<c_w;x++){
616         _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
617          18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64>>7,255);
618         if((x<<1|1)<dst_c_w){
619           _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
620            86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64>>7,255);
621         }
622       }
623       _dst+=dst_c_w;
624       _aux+=c_w;
625     }
626   }
627 }
628
629 /*The image is padded with empty chroma components at 4:2:0.
630   This costs about 17 bits a frame to code.*/
631 static void y4m_convert_mono_420jpeg(unsigned char *_dst,
632  unsigned char *_aux){
633   int c_sz;
634   _dst+=pic_w*pic_h;
635   c_sz=((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
636   memset(_dst,128,c_sz*2);
637 }
638
639 #if 0
640 /*Right now just 444 to 420.
641   Not too hard to generalize.*/
642 static void y4m_convert_4xxjpeg_42xjpeg(unsigned char *_dst,
643  unsigned char *_aux){
644   unsigned char *tmp;
645   int            c_w;
646   int            c_h;
647   int            pic_sz;
648   int            tmp_sz;
649   int            c_sz;
650   int            pli;
651   int            y;
652   int            x;
653   /*Compute the size of each chroma plane.*/
654   c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
655   c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
656   pic_sz=pic_w*pic_h;
657   tmp_sz=c_w*pic_h;
658   c_sz=c_w*c_h;
659   _dst+=pic_sz;
660   for(pli=1;pli<3;pli++){
661     tmp=_aux+pic_sz;
662     /*In reality, the horizontal and vertical steps could be pipelined, for
663        less memory consumption and better cache performance, but we do them
664        separately for simplicity.*/
665     /*First do horizontal filtering (convert to 4:2:2)*/
666     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
667     for(y=0;y<pic_h;y++){
668       for(x=0;x<OC_MINI(pic_w,2);x+=2){
669         tmp[x>>1]=OC_CLAMPI(0,64*_aux[0]+78*_aux[OC_MINI(1,pic_w-1)]-
670          17*_aux[OC_MINI(2,pic_w-1)]+3*_aux[OC_MINI(3,pic_w-1)]+64>>7,255);
671       }
672       for(;x<pic_w-3;x+=2){
673         tmp[x>>1]=OC_CLAMPI(0,3*(_aux[x-2]+_aux[x+3])-17*(_aux[x-1]+_aux[x+2])+
674          78*(_aux[x]+_aux[x+1])+64>>7,255);
675       }
676       for(;x<pic_w;x+=2){
677         tmp[x>>1]=OC_CLAMPI(0,3*(_aux[x-2]+_aux[pic_w-1])-
678          17*(_aux[x-1]+_aux[OC_MINI(x+2,pic_w-1)])+
679          78*(_aux[x]+_aux[OC_MINI(x+1,pic_w-1)])+64>>7,255);
680       }
681       tmp+=c_w;
682       _aux+=pic_w;
683     }
684     _aux-=pic_sz;
685     tmp-=tmp_sz;
686     /*Now do the vertical filtering.*/
687     for(x=0;x<c_w;x++){
688       for(y=0;y<OC_MINI(pic_h,2);y+=2){
689         _dst[(y>>1)*c_w]=OC_CLAMPI(0,64*tmp[0]+78*tmp[OC_MINI(1,pic_h-1)*c_w]-
690          17*tmp[OC_MINI(2,pic_h-1)*c_w]+3*tmp[OC_MINI(3,pic_h-1)*c_w]+
691          64>>7,255);
692       }
693       for(;y<pic_h-3;y+=2){
694         _dst[(y>>1)*c_w]=OC_CLAMPI(0,3*(tmp[(y-2)*c_w]+tmp[(y+3)*c_w])-
695          17*(tmp[(y-1)*c_w]+tmp[(y+2)*c_w])+78*(tmp[y*c_w]+tmp[(y+1)*c_w])+
696          64>>7,255);
697       }
698       for(;y<pic_h;y+=2){
699         _dst[(y>>1)*c_w]=OC_CLAMPI(0,3*(tmp[(y-2)*c_w]+tmp[(pic_h-1)*c_w])-
700          17*(tmp[(y-1)*c_w]+tmp[OC_MINI(y+2,pic_h-1)*c_w])+
701          78*(tmp[y*c_w]+tmp[OC_MINI(y+1,pic_h-1)*c_w])+64>>7,255);
702       }
703       tmp++;
704       _dst++;
705     }
706     _dst-=c_w;
707   }
708 }
709 #endif
710
711
712 /*No conversion function needed.*/
713 static void y4m_convert_null(unsigned char *_dst,
714  unsigned char *_aux){
715 }
716
717 static void id_file(char *f){
718   FILE *test;
719   unsigned char buffer[80];
720   int ret;
721
722   /* open it, look for magic */
723
724   if(!strcmp(f,"-")){
725     /* stdin */
726     test=stdin;
727   }else{
728     test=fopen(f,"rb");
729     if(!test){
730       fprintf(stderr,"Unable to open file %s.\n",f);
731       exit(1);
732     }
733   }
734
735   ret=fread(buffer,1,4,test);
736   if(ret<4){
737     fprintf(stderr,"EOF determining file type of file %s.\n",f);
738     exit(1);
739   }
740
741   if(!memcmp(buffer,"RIFF",4)){
742     /* possible WAV file */
743
744     if(audio){
745       /* umm, we already have one */
746       fprintf(stderr,"Multiple RIFF WAVE files specified on command line.\n");
747       exit(1);
748     }
749
750     /* Parse the rest of the header */
751
752     ret=fread(buffer,1,8,test);
753     if(ret<8)goto riff_err;
754     if(!memcmp(buffer+4,"WAVE",4)){
755
756       while(!feof(test)){
757         ret=fread(buffer,1,4,test);
758         if(ret<4)goto riff_err;
759         if(!memcmp("fmt",buffer,3)){
760
761           /* OK, this is our audio specs chunk.  Slurp it up. */
762
763           ret=fread(buffer,1,20,test);
764           if(ret<20)goto riff_err;
765
766           if(memcmp(buffer+4,"\001\000",2)){
767             fprintf(stderr,"The WAV file %s is in a compressed format; "
768                     "can't read it.\n",f);
769             exit(1);
770           }
771
772           audio=test;
773           audio_ch=buffer[6]+(buffer[7]<<8);
774           audio_hz=buffer[8]+(buffer[9]<<8)+
775             (buffer[10]<<16)+(buffer[11]<<24);
776
777           if(buffer[18]+(buffer[19]<<8)!=16){
778             fprintf(stderr,"Can only read 16 bit WAV files for now.\n");
779             exit(1);
780           }
781
782           /* Now, align things to the beginning of the data */
783           /* Look for 'dataxxxx' */
784           while(!feof(test)){
785             ret=fread(buffer,1,4,test);
786             if(ret<4)goto riff_err;
787             if(!memcmp("data",buffer,4)){
788               /* We're there.  Ignore the declared size for now. */
789               ret=fread(buffer,1,4,test);
790               if(ret<4)goto riff_err;
791
792               fprintf(stderr,"File %s is 16 bit %d channel %d Hz RIFF WAV audio.\n",
793                       f,audio_ch,audio_hz);
794
795               return;
796             }
797           }
798         }
799       }
800     }
801
802     fprintf(stderr,"Couldn't find WAVE data in RIFF file %s.\n",f);
803     exit(1);
804
805   }
806   if(!memcmp(buffer,"YUV4",4)){
807     /* possible YUV2MPEG2 format file */
808     /* read until newline, or 80 cols, whichever happens first */
809     int i;
810     for(i=0;i<79;i++){
811       ret=fread(buffer+i,1,1,test);
812       if(ret<1)goto yuv_err;
813       if(buffer[i]=='\n')break;
814     }
815     if(i==79){
816       fprintf(stderr,"Error parsing %s header; not a YUV2MPEG2 file?\n",f);
817     }
818     buffer[i]='\0';
819
820     if(!memcmp(buffer,"MPEG",4)){
821
822       if(video){
823         /* umm, we already have one */
824         fprintf(stderr,"Multiple video files specified on command line.\n");
825         exit(1);
826       }
827
828       if(buffer[4]!='2'){
829         fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
830       }
831
832       ret=y4m_parse_tags((char *)buffer+5);
833       if(ret<0){
834         fprintf(stderr,"Error parsing YUV4MPEG2 header in file %s.\n",f);
835         exit(1);
836       }
837
838       if(interlace!='p'){
839         fprintf(stderr,"Input video is interlaced; Theora handles only progressive scan\n");
840         exit(1);
841       }
842
843       if(strcmp(chroma_type,"420")==0||strcmp(chroma_type,"420jpeg")==0){
844         src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
845         y4m_dst_buf_read_sz=pic_w*pic_h+2*((pic_w+1)/2)*((pic_h+1)/2);
846         /*Natively supported: no conversion required.*/
847         y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
848         y4m_convert=y4m_convert_null;
849       }
850       else if(strcmp(chroma_type,"420mpeg2")==0){
851         src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
852         y4m_dst_buf_read_sz=pic_w*pic_h;
853         /*Chroma filter required: read into the aux buf first.*/
854         y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
855         y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
856       }
857       else if(strcmp(chroma_type,"420paldv")==0){
858         src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
859         y4m_dst_buf_read_sz=pic_w*pic_h;
860         /*Chroma filter required: read into the aux buf first.
861           We need to make two filter passes, so we need some extra space in the
862            aux buffer.*/
863         y4m_aux_buf_sz=3*((pic_w+1)/2)*((pic_h+1)/2);
864         y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
865         y4m_convert=y4m_convert_42xpaldv_42xjpeg;
866       }
867       else if(strcmp(chroma_type,"422")==0){
868         src_c_dec_h=dst_c_dec_h=2;
869         src_c_dec_v=dst_c_dec_v=1;
870         y4m_dst_buf_read_sz=pic_w*pic_h;
871         /*Chroma filter required: read into the aux buf first.*/
872         y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*pic_h;
873         y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
874       }
875       else if(strcmp(chroma_type,"422jpeg")==0){
876         src_c_dec_h=dst_c_dec_h=2;
877         src_c_dec_v=dst_c_dec_v=1;
878         y4m_dst_buf_read_sz=pic_w*pic_h+2*((pic_w+1)/2)*pic_h;
879         /*Natively supported: no conversion required.*/
880         y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
881         y4m_convert=y4m_convert_null;
882       }
883       else if(strcmp(chroma_type,"411")==0){
884         src_c_dec_h=4;
885         /*We don't want to introduce any additional sub-sampling, so we
886            promote 4:1:1 material to 4:2:2, as the closest format Theora can
887            handle.*/
888         dst_c_dec_h=2;
889         src_c_dec_v=dst_c_dec_v=1;
890         y4m_dst_buf_read_sz=pic_w*pic_h;
891         /*Chroma filter required: read into the aux buf first.*/
892         y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+3)/4)*pic_h;
893         y4m_convert=y4m_convert_411_422jpeg;
894       }
895       else if(strcmp(chroma_type,"444")==0){
896         src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
897         y4m_dst_buf_read_sz=pic_w*pic_h*3;
898         y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
899         y4m_convert=y4m_convert_null;
900       }
901       else if(strcmp(chroma_type,"444alpha")==0){
902         src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
903         y4m_dst_buf_read_sz=pic_w*pic_h*3;
904         /*Read the extra alpha plane into the aux buf.
905           It will be discarded.*/
906         y4m_aux_buf_sz=y4m_aux_buf_read_sz=pic_w*pic_h;
907         y4m_convert=y4m_convert_null;
908       }
909       else if(strcmp(chroma_type,"mono")==0){
910         src_c_dec_h=src_c_dec_v=0;
911         dst_c_dec_h=dst_c_dec_v=2;
912         y4m_dst_buf_read_sz=pic_w*pic_h;
913         y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
914         y4m_convert=y4m_convert_mono_420jpeg;
915       }
916       else{
917         fprintf(stderr,"Unknown chroma sampling type: %s\n",chroma_type);
918         exit(1);
919       }
920       /*The size of the final frame buffers is always computed from the
921          destination chroma decimation type.*/
922       y4m_dst_buf_sz=pic_w*pic_h+2*((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*
923        ((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
924
925       video=test;
926
927       fprintf(stderr,"File %s is %dx%d %.02f fps %s video.\n",
928               f,pic_w,pic_h,(double)video_fps_n/video_fps_d,chroma_type);
929
930       return;
931     }
932   }
933   fprintf(stderr,"Input file %s is neither a WAV nor YUV4MPEG2 file.\n",f);
934   exit(1);
935
936  riff_err:
937   fprintf(stderr,"EOF parsing RIFF file %s.\n",f);
938   exit(1);
939  yuv_err:
940   fprintf(stderr,"EOF parsing YUV4MPEG2 file %s.\n",f);
941   exit(1);
942
943 }
944
945 int spinner=0;
946 char *spinascii="|/-\\";
947 void spinnit(void){
948   spinner++;
949   if(spinner==4)spinner=0;
950   fprintf(stderr,"\r%c",spinascii[spinner]);
951 }
952
953 int fetch_and_process_audio(FILE *audio,ogg_page *audiopage,
954                             ogg_stream_state *vo,
955                             vorbis_dsp_state *vd,
956                             vorbis_block *vb,
957                             int audioflag){
958   static ogg_int64_t samples_sofar=0;
959   ogg_packet op;
960   int i,j;
961   ogg_int64_t beginsample = audio_hz*(begin_sec+begin_usec*.000001);
962   ogg_int64_t endsample = audio_hz*(end_sec+end_usec*.000001);
963
964   while(audio && !audioflag){
965     /* process any audio already buffered */
966     spinnit();
967     if(ogg_stream_pageout(vo,audiopage)>0) return 1;
968     if(ogg_stream_eos(vo))return 0;
969
970     {
971       /* read and process more audio */
972       signed char readbuffer[4096];
973       signed char *readptr=readbuffer;
974       int toread=4096/2/audio_ch;
975       int bytesread=fread(readbuffer,1,toread*2*audio_ch,audio);
976       int sampread=bytesread/2/audio_ch;
977       float **vorbis_buffer;
978       int count=0;
979
980       if(bytesread<=0 ||
981          (samples_sofar>=endsample && endsample>0)){
982         /* end of file.  this can be done implicitly, but it's
983            easier to see here in non-clever fashion.  Tell the
984            library we're at end of stream so that it can handle the
985            last frame and mark end of stream in the output properly */
986         vorbis_analysis_wrote(vd,0);
987       }else{
988         if(samples_sofar < beginsample){
989           if(samples_sofar+sampread > beginsample){
990             readptr += (beginsample-samples_sofar)*2*audio_ch;
991             sampread += samples_sofar-beginsample;
992             samples_sofar = sampread+beginsample;
993           }else{
994             samples_sofar += sampread;
995             sampread = 0;
996           }
997         }else{
998           samples_sofar += sampread;
999         }
1000
1001         if(samples_sofar > endsample && endsample > 0)
1002           sampread-= (samples_sofar - endsample);
1003
1004         if(sampread>0){
1005
1006           vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
1007           /* uninterleave samples */
1008           for(i=0;i<sampread;i++){
1009             for(j=0;j<audio_ch;j++){
1010               vorbis_buffer[j][i]=((readptr[count+1]<<8)|
1011                                    (0x00ff&(int)readptr[count]))/32768.f;
1012               count+=2;
1013             }
1014           }
1015
1016           vorbis_analysis_wrote(vd,sampread);
1017         }
1018       }
1019
1020       while(vorbis_analysis_blockout(vd,vb)==1){
1021
1022         /* analysis, assume we want to use bitrate management */
1023         vorbis_analysis(vb,NULL);
1024         vorbis_bitrate_addblock(vb);
1025
1026         /* weld packets into the bitstream */
1027         while(vorbis_bitrate_flushpacket(vd,&op))
1028           ogg_stream_packetin(vo,&op);
1029
1030       }
1031     }
1032   }
1033
1034   return audioflag;
1035 }
1036
1037 static int                 frame_state=-1;
1038 static ogg_int64_t         frames=0;
1039 static unsigned char      *yuvframe[3];
1040 static th_ycbcr_buffer     ycbcr;
1041
1042 int fetch_and_process_video_packet(FILE *video,FILE *twopass_file,int passno,
1043  th_enc_ctx *td,ogg_packet *op){
1044   int                        ret;
1045   int                        pic_sz;
1046   int                        c_w;
1047   int                        c_h;
1048   int                        c_sz;
1049   ogg_int64_t                beginframe;
1050   ogg_int64_t                endframe;
1051   spinnit();
1052   beginframe=video_fps_n*(begin_sec+begin_usec*.000001)/video_fps_d;
1053   endframe=video_fps_n*(end_sec+end_usec*.000001)/video_fps_d;
1054   if(frame_state==-1){
1055     /* initialize the double frame buffer */
1056     yuvframe[0]=(unsigned char *)malloc(y4m_dst_buf_sz);
1057     yuvframe[1]=(unsigned char *)malloc(y4m_dst_buf_sz);
1058     yuvframe[2]=(unsigned char *)malloc(y4m_aux_buf_sz);
1059     frame_state=0;
1060   }
1061   pic_sz=pic_w*pic_h;
1062   c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
1063   c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
1064   c_sz=c_w*c_h;
1065   /* read and process more video */
1066   /* video strategy reads one frame ahead so we know when we're
1067      at end of stream and can mark last video frame as such
1068      (vorbis audio has to flush one frame past last video frame
1069      due to overlap and thus doesn't need this extra work */
1070
1071   /* have two frame buffers full (if possible) before
1072      proceeding.  after first pass and until eos, one will
1073      always be full when we get here */
1074   for(;frame_state<2 && (frames<endframe || endframe<0);){
1075     char c,frame[6];
1076     int ret=fread(frame,1,6,video);
1077     /* match and skip the frame header */
1078     if(ret<6)break;
1079     if(memcmp(frame,"FRAME",5)){
1080       fprintf(stderr,"Loss of framing in YUV input data\n");
1081       exit(1);
1082     }
1083     if(frame[5]!='\n'){
1084       int j;
1085       for(j=0;j<79;j++)
1086         if(fread(&c,1,1,video)&&c=='\n')break;
1087       if(j==79){
1088         fprintf(stderr,"Error parsing YUV frame header\n");
1089         exit(1);
1090       }
1091     }
1092     /*Read the frame data that needs no conversion.*/
1093     if(fread(yuvframe[frame_state],1,y4m_dst_buf_read_sz,video)!=
1094      y4m_dst_buf_read_sz){
1095       fprintf(stderr,"Error reading YUV frame data.\n");
1096       exit(1);
1097     }
1098     /*Read the frame data that does need conversion.*/
1099     if(fread(yuvframe[2],1,y4m_aux_buf_read_sz,video)!=y4m_aux_buf_read_sz){
1100       fprintf(stderr,"Error reading YUV frame data.\n");
1101       exit(1);
1102     }
1103     /*Now convert the just read frame.*/
1104     (*y4m_convert)(yuvframe[frame_state],yuvframe[2]);
1105     frames++;
1106     if(frames>=beginframe)
1107     frame_state++;
1108   }
1109   /* check to see if there are dupes to flush */
1110   if(th_encode_packetout(td,frame_state<1,op)>0)return 1;
1111   if(frame_state<1){
1112     /* can't get here unless YUV4MPEG stream has no video */
1113     fprintf(stderr,"Video input contains no frames.\n");
1114     exit(1);
1115   }
1116   /* Theora is a one-frame-in,one-frame-out system; submit a frame
1117      for compression and pull out the packet */
1118   /* in two-pass mode's second pass, we need to submit first-pass data */
1119   if(passno==2){
1120     for(;;){
1121       static unsigned char buffer[80];
1122       static int buf_pos;
1123       int bytes;
1124       /*Ask the encoder how many bytes it would like.*/
1125       bytes=th_encode_ctl(td,TH_ENCCTL_2PASS_IN,NULL,0);
1126       if(bytes<0){
1127         fprintf(stderr,"Error submitting pass data in second pass.\n");
1128         exit(1);
1129       }
1130       /*If it's got enough, stop.*/
1131       if(bytes==0)break;
1132       /*Read in some more bytes, if necessary.*/
1133       if(bytes>80-buf_pos)bytes=80-buf_pos;
1134       if(bytes>0&&fread(buffer+buf_pos,1,bytes,twopass_file)<bytes){
1135         fprintf(stderr,"Could not read frame data from two-pass data file!\n");
1136         exit(1);
1137       }
1138       /*And pass them off.*/
1139       ret=th_encode_ctl(td,TH_ENCCTL_2PASS_IN,buffer,bytes);
1140       if(ret<0){
1141         fprintf(stderr,"Error submitting pass data in second pass.\n");
1142         exit(1);
1143       }
1144       /*If the encoder consumed the whole buffer, reset it.*/
1145       if(ret>=bytes)buf_pos=0;
1146       /*Otherwise remember how much it used.*/
1147       else buf_pos+=ret;
1148     }
1149   }
1150   /*We submit the buffer using the size of the picture region.
1151     libtheora will pad the picture region out to the full frame size for us,
1152      whether we pass in a full frame or not.*/
1153   ycbcr[0].width=pic_w;
1154   ycbcr[0].height=pic_h;
1155   ycbcr[0].stride=pic_w;
1156   ycbcr[0].data=yuvframe[0];
1157   ycbcr[1].width=c_w;
1158   ycbcr[1].height=c_h;
1159   ycbcr[1].stride=c_w;
1160   ycbcr[1].data=yuvframe[0]+pic_sz;
1161   ycbcr[2].width=c_w;
1162   ycbcr[2].height=c_h;
1163   ycbcr[2].stride=c_w;
1164   ycbcr[2].data=yuvframe[0]+pic_sz+c_sz;
1165   th_encode_ycbcr_in(td,ycbcr);
1166   {
1167     unsigned char *temp=yuvframe[0];
1168     yuvframe[0]=yuvframe[1];
1169     yuvframe[1]=temp;
1170     frame_state--;
1171   }
1172   /* in two-pass mode's first pass we need to extract and save the pass data */
1173   if(passno==1){
1174     unsigned char *buffer;
1175     int bytes = th_encode_ctl(td, TH_ENCCTL_2PASS_OUT, &buffer, sizeof(buffer));
1176     if(bytes<0){
1177       fprintf(stderr,"Could not read two-pass data from encoder.\n");
1178       exit(1);
1179     }
1180     if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1181       fprintf(stderr,"Unable to write to two-pass data file.\n");
1182       exit(1);
1183     }
1184     fflush(twopass_file);
1185   }
1186   /* if there was only one frame, it's the last in the stream */
1187   ret = th_encode_packetout(td,frame_state<1,op);
1188   if(passno==1 && frame_state<1){
1189     /* need to read the final (summary) packet */
1190     unsigned char *buffer;
1191     int bytes = th_encode_ctl(td, TH_ENCCTL_2PASS_OUT, &buffer, sizeof(buffer));
1192     if(bytes<0){
1193       fprintf(stderr,"Could not read two-pass summary data from encoder.\n");
1194       exit(1);
1195     }
1196     if(fseek(twopass_file,0,SEEK_SET)<0){
1197       fprintf(stderr,"Unable to seek in two-pass data file.\n");
1198       exit(1);
1199     }
1200     if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1201       fprintf(stderr,"Unable to write to two-pass data file.\n");
1202       exit(1);
1203     }
1204     fflush(twopass_file);
1205   }
1206   return ret;
1207 }
1208
1209
1210 int fetch_and_process_video(FILE *video,ogg_page *videopage,
1211  ogg_stream_state *to,th_enc_ctx *td,FILE *twopass_file,int passno,
1212  int videoflag){
1213   ogg_packet op;
1214   int ret;
1215   /* is there a video page flushed?  If not, work until there is. */
1216   while(!videoflag){
1217     if(ogg_stream_pageout(to,videopage)>0) return 1;
1218     if(ogg_stream_eos(to)) return 0;
1219     ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
1220     if(ret<=0)return 0;
1221     ogg_stream_packetin(to,&op);
1222   }
1223   return videoflag;
1224 }
1225
1226 static int ilog(unsigned _v){
1227   int ret;
1228   for(ret=0;_v;ret++)_v>>=1;
1229   return ret;
1230 }
1231
1232 static int parse_time(long *_sec,long *_usec,const char *_optarg){
1233   double      secf;
1234   long        secl;
1235   const char *pos;
1236   char       *end;
1237   int         err;
1238   err=0;
1239   secl=0;
1240   pos=strchr(_optarg,':');
1241   if(pos!=NULL){
1242     char *pos2;
1243     secl=strtol(_optarg,&end,10)*60;
1244     err|=pos!=end;
1245     pos2=strchr(++pos,':');
1246     if(pos2!=NULL){
1247       secl=(secl+strtol(pos,&end,10))*60;
1248       err|=pos2!=end;
1249       pos=pos2+1;
1250     }
1251   }
1252   else pos=_optarg;
1253   secf=strtod(pos,&end);
1254   if(err||*end!='\0')return -1;
1255   *_sec=secl+(long)floor(secf);
1256   *_usec=(long)((secf-floor(secf))*1E6+0.5);
1257   return 0;
1258 }
1259
1260 int main(int argc,char *argv[]){
1261   int c,long_option_index,ret;
1262
1263   ogg_stream_state to; /* take physical pages, weld into a logical
1264                            stream of packets */
1265   ogg_stream_state vo; /* take physical pages, weld into a logical
1266                            stream of packets */
1267   ogg_page         og; /* one Ogg bitstream page.  Vorbis packets are inside */
1268   ogg_packet       op; /* one raw packet of data for decode */
1269
1270   th_enc_ctx      *td;
1271   th_info          ti;
1272   th_comment       tc;
1273
1274   vorbis_info      vi; /* struct that stores all the static vorbis bitstream
1275                           settings */
1276   vorbis_comment   vc; /* struct that stores all the user comments */
1277
1278   vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */
1279   vorbis_block     vb; /* local working space for packet->PCM decode */
1280
1281   int speed=-1;
1282   int audioflag=0;
1283   int videoflag=0;
1284   int akbps=0;
1285   int vkbps=0;
1286   int soft_target=0;
1287
1288   ogg_int64_t audio_bytesout=0;
1289   ogg_int64_t video_bytesout=0;
1290   double timebase;
1291
1292   FILE *outfile = stdout;
1293
1294   FILE *twopass_file = NULL;
1295   fpos_t video_rewind_pos;
1296   int twopass=0;
1297   int passno;
1298
1299 #ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
1300   /* if we were reading/writing a file, it would also need to in
1301      binary mode, eg, fopen("file.wav","wb"); */
1302   /* Beware the evil ifdef. We avoid these where we can, but this one we
1303      cannot. Don't add any more, you'll probably go to hell if you do. */
1304   _setmode( _fileno( stdin ), _O_BINARY );
1305   _setmode( _fileno( stdout ), _O_BINARY );
1306 #endif
1307
1308   while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
1309     switch(c){
1310     case 'o':
1311       outfile=fopen(optarg,"wb");
1312       if(outfile==NULL){
1313         fprintf(stderr,"Unable to open output file '%s'\n", optarg);
1314         exit(1);
1315       }
1316       break;;
1317
1318     case 'a':
1319       audio_q=(float)(atof(optarg)*.099);
1320       if(audio_q<-.1 || audio_q>1){
1321         fprintf(stderr,"Illegal audio quality (choose -1 through 10)\n");
1322         exit(1);
1323       }
1324       audio_r=-1;
1325       break;
1326
1327     case 'v':
1328       video_q=(int)rint(6.3*atof(optarg));
1329       if(video_q<0 || video_q>63){
1330         fprintf(stderr,"Illegal video quality (choose 0 through 10)\n");
1331         exit(1);
1332       }
1333       break;
1334
1335     case 'A':
1336       audio_r=(int)(atof(optarg)*1000);
1337       if(audio_q<0){
1338         fprintf(stderr,"Illegal audio quality (choose > 0 please)\n");
1339         exit(1);
1340       }
1341       audio_q=-99;
1342       break;
1343
1344     case 'V':
1345       video_r=(int)rint(atof(optarg)*1000);
1346       if(video_r<1){
1347         fprintf(stderr,"Illegal video bitrate (choose > 0 please)\n");
1348         exit(1);
1349       }
1350      break;
1351
1352     case '\1':
1353       soft_target=1;
1354       break;
1355
1356     case 's':
1357       video_par_n=(int)rint(atof(optarg));
1358       break;
1359
1360     case 'S':
1361       video_par_d=(int)rint(atof(optarg));
1362       break;
1363
1364     case 'f':
1365       video_fps_n=(int)rint(atof(optarg));
1366       break;
1367
1368     case 'F':
1369       video_fps_d=(int)rint(atof(optarg));
1370       break;
1371
1372     case 'c':
1373       vp3_compatible=1;
1374       break;
1375
1376     case 'k':
1377       keyframe_frequency=rint(atof(optarg));
1378       if(keyframe_frequency<1 || keyframe_frequency>2147483647){
1379         fprintf(stderr,"Illegal keyframe frequency\n");
1380         exit(1);
1381       }
1382       break;
1383
1384     case 'd':
1385       buf_delay=atoi(optarg);
1386       if(buf_delay<=0){
1387         fprintf(stderr,"Illegal buffer delay\n");
1388         exit(1);
1389       }
1390       break;
1391
1392     case 'z':
1393       speed=atoi(optarg);
1394       if(speed<0){
1395         fprintf(stderr,"Illegal speed level\n");
1396         exit(1);
1397       }
1398       break;
1399
1400     case 'b':
1401       {
1402         if(parse_time(&begin_sec,&begin_usec,optarg)<0){
1403           fprintf(stderr,"Error parsing begin time '%s'.\n",optarg);
1404           exit(1);
1405         }
1406       }
1407       break;
1408     case 'e':
1409       {
1410         if(parse_time(&end_sec,&end_usec,optarg)<0){
1411           fprintf(stderr,"Error parsing end time '%s'.\n",optarg);
1412           exit(1);
1413         }
1414       }
1415       break;
1416     case '\2':
1417       twopass=3; /* perform both passes */
1418       twopass_file=tmpfile();
1419       if(!twopass_file){
1420         fprintf(stderr,"Unable to open temporary file for twopass data\n");
1421         exit(1);
1422       }
1423       break;
1424     case '\3':
1425       twopass=1; /* perform first pass */
1426       twopass_file=fopen(optarg,"wb");
1427       if(!twopass_file){
1428         fprintf(stderr,"Unable to open \'%s\' for twopass data\n",optarg);
1429         exit(1);
1430       }
1431       break;
1432     case '\4':
1433       twopass=2; /* perform second pass */
1434       twopass_file=fopen(optarg,"rb");
1435       if(!twopass_file){
1436         fprintf(stderr,"Unable to open twopass data file \'%s\'",optarg);
1437         exit(1);
1438       }
1439       break;
1440 #if defined(OC_COLLECT_METRICS)
1441     case 'm':
1442       if(th_encode_ctl(NULL,TH_ENCCTL_SET_METRICS_FILE,
1443        optarg,strlen(optarg)+1)){
1444         fprintf(stderr,"Unable to set metrics collection file name.\n");
1445         fprintf(stderr,"libtheora not compiled with OC_COLLECT_METRICS?\n");
1446         exit(1);
1447       }
1448       break;
1449 #endif
1450
1451     default:
1452       usage();
1453     }
1454   }
1455
1456   if(soft_target){
1457     if(video_r<=0){
1458       fprintf(stderr,"Soft rate target (--soft-target) requested without a bitrate (-V).\n");
1459       exit(1);
1460     }
1461     if(video_q==-1)
1462       video_q=0;
1463   }else{
1464     if(video_q==-1){
1465       if(video_r>0)
1466         video_q=0;
1467       else
1468         video_q=48;
1469     }
1470   }
1471
1472   if(keyframe_frequency<=0){
1473     /*Use a default keyframe frequency of 64 for 1-pass (streaming) mode, and
1474        256 for two-pass mode.*/
1475     keyframe_frequency=twopass?256:64;
1476   }
1477
1478   while(optind<argc){
1479     /* assume that anything following the options must be a filename */
1480     id_file(argv[optind]);
1481     optind++;
1482   }
1483
1484   if(twopass==3){
1485     /* verify that the input is seekable! */
1486     if(video){
1487       if(fseek(video,0,SEEK_CUR)){
1488         fprintf(stderr,"--two-pass (automatic two-pass) requires the video input\n"
1489                 "to be seekable.  For non-seekable input, encoder_example\n"
1490                 "must be run twice, first with the --first-pass option, then\n"
1491                 "with the --second-pass option.\n\n");
1492         exit(1);
1493       }
1494       if(fgetpos(video,&video_rewind_pos)<0){
1495         fprintf(stderr,"Unable to determine start position of video data.\n");
1496         exit(1);
1497       }
1498     }
1499   }
1500
1501   /* Set up Ogg output stream */
1502   srand(time(NULL));
1503   ogg_stream_init(&to,rand()); /* oops, add one to the above */
1504
1505   /* initialize Vorbis assuming we have audio to compress. */
1506   if(audio && twopass!=1){
1507     ogg_stream_init(&vo,rand());
1508     vorbis_info_init(&vi);
1509     if(audio_q>-99)
1510       ret = vorbis_encode_init_vbr(&vi,audio_ch,audio_hz,audio_q);
1511     else
1512       ret = vorbis_encode_init(&vi,audio_ch,audio_hz,-1,
1513                                (int)(64870*(ogg_int64_t)audio_r>>16),-1);
1514     if(ret){
1515       fprintf(stderr,"The Vorbis encoder could not set up a mode according to\n"
1516               "the requested quality or bitrate.\n\n");
1517       exit(1);
1518     }
1519
1520     vorbis_comment_init(&vc);
1521     vorbis_analysis_init(&vd,&vi);
1522     vorbis_block_init(&vd,&vb);
1523   }
1524
1525   for(passno=(twopass==3?1:twopass);passno<=(twopass==3?2:twopass);passno++){
1526     /* Set up Theora encoder */
1527     if(!video){
1528       fprintf(stderr,"No video files submitted for compression?\n");
1529       exit(1);
1530     }
1531     /* Theora has a divisible-by-sixteen restriction for the encoded frame size */
1532     /* scale the picture size up to the nearest /16 and calculate offsets */
1533     frame_w=pic_w+15&~0xF;
1534     frame_h=pic_h+15&~0xF;
1535     /*Force the offsets to be even so that chroma samples line up like we
1536        expect.*/
1537     pic_x=frame_w-pic_w>>1&~1;
1538     pic_y=frame_h-pic_h>>1&~1;
1539     th_info_init(&ti);
1540     ti.frame_width=frame_w;
1541     ti.frame_height=frame_h;
1542     ti.pic_width=pic_w;
1543     ti.pic_height=pic_h;
1544     ti.pic_x=pic_x;
1545     ti.pic_y=pic_y;
1546     ti.fps_numerator=video_fps_n;
1547     ti.fps_denominator=video_fps_d;
1548     ti.aspect_numerator=video_par_n;
1549     ti.aspect_denominator=video_par_d;
1550     ti.colorspace=TH_CS_UNSPECIFIED;
1551     /*Account for the Ogg page overhead.
1552       This is 1 byte per 255 for lacing values, plus 26 bytes per 4096 bytes for
1553        the page header, plus approximately 1/2 byte per packet (not accounted for
1554        here).*/
1555     ti.target_bitrate=(int)(64870*(ogg_int64_t)video_r>>16);
1556     ti.quality=video_q;
1557     ti.keyframe_granule_shift=ilog(keyframe_frequency-1);
1558     if(dst_c_dec_h==2){
1559       if(dst_c_dec_v==2)ti.pixel_fmt=TH_PF_420;
1560       else ti.pixel_fmt=TH_PF_422;
1561     }
1562     else ti.pixel_fmt=TH_PF_444;
1563     td=th_encode_alloc(&ti);
1564     th_info_clear(&ti);
1565     /* setting just the granule shift only allows power-of-two keyframe
1566        spacing.  Set the actual requested spacing. */
1567     ret=th_encode_ctl(td,TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
1568      &keyframe_frequency,sizeof(keyframe_frequency-1));
1569     if(ret<0){
1570       fprintf(stderr,"Could not set keyframe interval to %d.\n",(int)keyframe_frequency);
1571     }
1572     if(vp3_compatible){
1573       ret=th_encode_ctl(td,TH_ENCCTL_SET_VP3_COMPATIBLE,&vp3_compatible,
1574        sizeof(vp3_compatible));
1575       if(ret<0||!vp3_compatible){
1576         fprintf(stderr,"Could not enable strict VP3 compatibility.\n");
1577         if(ret>=0){
1578           fprintf(stderr,"Ensure your source format is supported by VP3.\n");
1579           fprintf(stderr,
1580            "(4:2:0 pixel format, width and height multiples of 16).\n");
1581         }
1582       }
1583     }
1584     if(soft_target){
1585       /* reverse the rate control flags to favor a 'long time' strategy */
1586       int arg = TH_RATECTL_CAP_UNDERFLOW;
1587       ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_FLAGS,&arg,sizeof(arg));
1588       if(ret<0)
1589         fprintf(stderr,"Could not set encoder flags for --soft-target\n");
1590       /* Default buffer control is overridden on two-pass */
1591       if(!twopass&&buf_delay<0){
1592         if((keyframe_frequency*7>>1) > 5*video_fps_n/video_fps_d)
1593           arg=keyframe_frequency*7>>1;
1594         else
1595           arg=5*video_fps_n/video_fps_d;
1596         ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER,&arg,sizeof(arg));
1597         if(ret<0)
1598           fprintf(stderr,"Could not set rate control buffer for --soft-target\n");
1599       }
1600     }
1601     /* set up two-pass if needed */
1602     if(passno==1){
1603       unsigned char *buffer;
1604       int bytes;
1605       bytes=th_encode_ctl(td,TH_ENCCTL_2PASS_OUT,&buffer,sizeof(buffer));
1606       if(bytes<0){
1607         fprintf(stderr,"Could not set up the first pass of two-pass mode.\n");
1608         fprintf(stderr,"Did you remember to specify an estimated bitrate?\n");
1609         exit(1);
1610       }
1611       /*Perform a seek test to ensure we can overwrite this placeholder data at
1612          the end; this is better than letting the user sit through a whole
1613          encode only to find out their pass 1 file is useless at the end.*/
1614       if(fseek(twopass_file,0,SEEK_SET)<0){
1615         fprintf(stderr,"Unable to seek in two-pass data file.\n");
1616         exit(1);
1617       }
1618       if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1619         fprintf(stderr,"Unable to write to two-pass data file.\n");
1620         exit(1);
1621       }
1622       fflush(twopass_file);
1623     }
1624     if(passno==2){
1625       /*Enable the second pass here.
1626         We make this call just to set the encoder into 2-pass mode, because
1627          by default enabling two-pass sets the buffer delay to the whole file
1628          (because there's no way to explicitly request that behavior).
1629         If we waited until we were actually encoding, it would overwite our
1630          settings.*/
1631       if(th_encode_ctl(td,TH_ENCCTL_2PASS_IN,NULL,0)<0){
1632         fprintf(stderr,"Could not set up the second pass of two-pass mode.\n");
1633         exit(1);
1634       }
1635       if(twopass==3){
1636         /* 'automatic' second pass */
1637         if(fsetpos(video,&video_rewind_pos)<0){
1638           fprintf(stderr,"Could not rewind video input file for second pass!\n");
1639           exit(1);
1640         }
1641         if(fseek(twopass_file,0,SEEK_SET)<0){
1642           fprintf(stderr,"Unable to seek in two-pass data file.\n");
1643           exit(1);
1644         }
1645         frame_state=0;
1646         frames=0;
1647       }
1648     }
1649     /*Now we can set the buffer delay if the user requested a non-default one
1650        (this has to be done after two-pass is enabled).*/
1651     if(passno!=1&&buf_delay>=0){
1652       ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER,
1653        &buf_delay,sizeof(buf_delay));
1654       if(ret<0){
1655         fprintf(stderr,"Warning: could not set desired buffer delay.\n");
1656       }
1657     }
1658     /*Speed should also be set after the current encoder mode is established,
1659        since the available speed levels may change depending.*/
1660     if(speed>=0){
1661       int speed_max;
1662       int ret;
1663       ret=th_encode_ctl(td,TH_ENCCTL_GET_SPLEVEL_MAX,
1664        &speed_max,sizeof(speed_max));
1665       if(ret<0){
1666         fprintf(stderr,"Warning: could not determine maximum speed level.\n");
1667         speed_max=0;
1668       }
1669       ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL,&speed,sizeof(speed));
1670       if(ret<0){
1671         fprintf(stderr,"Warning: could not set speed level to %i of %i\n",
1672          speed,speed_max);
1673         if(speed>speed_max){
1674           fprintf(stderr,"Setting it to %i instead\n",speed_max);
1675         }
1676         ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL,
1677          &speed_max,sizeof(speed_max));
1678         if(ret<0){
1679           fprintf(stderr,"Warning: could not set speed level to %i of %i\n",
1680            speed_max,speed_max);
1681         }
1682       }
1683     }
1684     /* write the bitstream header packets with proper page interleave */
1685     th_comment_init(&tc);
1686     /* first packet will get its own page automatically */
1687     if(th_encode_flushheader(td,&tc,&op)<=0){
1688       fprintf(stderr,"Internal Theora library error.\n");
1689       exit(1);
1690     }
1691     if(passno!=1){
1692       ogg_stream_packetin(&to,&op);
1693       if(ogg_stream_pageout(&to,&og)!=1){
1694         fprintf(stderr,"Internal Ogg library error.\n");
1695         exit(1);
1696       }
1697       fwrite(og.header,1,og.header_len,outfile);
1698       fwrite(og.body,1,og.body_len,outfile);
1699     }
1700     /* create the remaining theora headers */
1701     for(;;){
1702       ret=th_encode_flushheader(td,&tc,&op);
1703       if(ret<0){
1704         fprintf(stderr,"Internal Theora library error.\n");
1705         exit(1);
1706       }
1707       else if(!ret)break;
1708       if(passno!=1)ogg_stream_packetin(&to,&op);
1709     }
1710     if(audio && passno!=1){
1711       ogg_packet header;
1712       ogg_packet header_comm;
1713       ogg_packet header_code;
1714       vorbis_analysis_headerout(&vd,&vc,&header,&header_comm,&header_code);
1715       ogg_stream_packetin(&vo,&header); /* automatically placed in its own
1716                                            page */
1717       if(ogg_stream_pageout(&vo,&og)!=1){
1718         fprintf(stderr,"Internal Ogg library error.\n");
1719         exit(1);
1720       }
1721       fwrite(og.header,1,og.header_len,outfile);
1722       fwrite(og.body,1,og.body_len,outfile);
1723       /* remaining vorbis header packets */
1724       ogg_stream_packetin(&vo,&header_comm);
1725       ogg_stream_packetin(&vo,&header_code);
1726     }
1727     /* Flush the rest of our headers. This ensures
1728        the actual data in each stream will start
1729        on a new page, as per spec. */
1730     if(passno!=1){
1731       for(;;){
1732         int result = ogg_stream_flush(&to,&og);
1733         if(result<0){
1734           /* can't get here */
1735           fprintf(stderr,"Internal Ogg library error.\n");
1736           exit(1);
1737         }
1738         if(result==0)break;
1739         fwrite(og.header,1,og.header_len,outfile);
1740         fwrite(og.body,1,og.body_len,outfile);
1741       }
1742     }
1743     if(audio && passno!=1){
1744       for(;;){
1745         int result=ogg_stream_flush(&vo,&og);
1746         if(result<0){
1747           /* can't get here */
1748           fprintf(stderr,"Internal Ogg library error.\n");
1749           exit(1);
1750         }
1751         if(result==0)break;
1752         fwrite(og.header,1,og.header_len,outfile);
1753         fwrite(og.body,1,og.body_len,outfile);
1754       }
1755     }
1756     /* setup complete.  Raw processing loop */
1757       switch(passno){
1758       case 0: case 2:
1759         fprintf(stderr,"\rCompressing....                                          \n");
1760         break;
1761       case 1:
1762         fprintf(stderr,"\rScanning first pass....                                  \n");
1763         break;
1764       }
1765     for(;;){
1766       int audio_or_video=-1;
1767       if(passno==1){
1768         ogg_packet op;
1769         int ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
1770         if(ret<0)break;
1771         if(op.e_o_s)break; /* end of stream */
1772         timebase=th_granule_time(td,op.granulepos);
1773         audio_or_video=1;
1774       }else{
1775         double audiotime;
1776         double videotime;
1777         ogg_page audiopage;
1778         ogg_page videopage;
1779         /* is there an audio page flushed?  If not, fetch one if possible */
1780         audioflag=fetch_and_process_audio(audio,&audiopage,&vo,&vd,&vb,audioflag);
1781         /* is there a video page flushed?  If not, fetch one if possible */
1782         videoflag=fetch_and_process_video(video,&videopage,&to,td,twopass_file,passno,videoflag);
1783         /* no pages of either?  Must be end of stream. */
1784         if(!audioflag && !videoflag)break;
1785         /* which is earlier; the end of the audio page or the end of the
1786            video page? Flush the earlier to stream */
1787         audiotime=
1788         audioflag?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
1789         videotime=
1790         videoflag?th_granule_time(td,ogg_page_granulepos(&videopage)):-1;
1791         if(!audioflag){
1792           audio_or_video=1;
1793         } else if(!videoflag) {
1794           audio_or_video=0;
1795         } else {
1796           if(audiotime<videotime)
1797             audio_or_video=0;
1798           else
1799             audio_or_video=1;
1800         }
1801         if(audio_or_video==1){
1802           /* flush a video page */
1803           video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
1804           video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
1805           videoflag=0;
1806           timebase=videotime;
1807         }else{
1808           /* flush an audio page */
1809           audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
1810           audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
1811           audioflag=0;
1812           timebase=audiotime;
1813         }
1814       }
1815       if(timebase > 0){
1816         int hundredths=(int)(timebase*100-(long)timebase*100);
1817         int seconds=(long)timebase%60;
1818         int minutes=((long)timebase/60)%60;
1819         int hours=(long)timebase/3600;
1820         if(audio_or_video)vkbps=(int)rint(video_bytesout*8./timebase*.001);
1821         else akbps=(int)rint(audio_bytesout*8./timebase*.001);
1822         fprintf(stderr,
1823                 "\r      %d:%02d:%02d.%02d audio: %dkbps video: %dkbps                 ",
1824                 hours,minutes,seconds,hundredths,akbps,vkbps);
1825       }
1826     }
1827     if(video)th_encode_free(td);
1828   }
1829
1830   /* clear out state */
1831   if(audio && twopass!=1){
1832     ogg_stream_clear(&vo);
1833     vorbis_block_clear(&vb);
1834     vorbis_dsp_clear(&vd);
1835     vorbis_comment_clear(&vc);
1836     vorbis_info_clear(&vi);
1837     if(audio!=stdin)fclose(audio);
1838   }
1839   if(video){
1840     ogg_stream_clear(&to);
1841     th_comment_clear(&tc);
1842     if(video!=stdin)fclose(video);
1843   }
1844
1845   if(outfile && outfile!=stdout)fclose(outfile);
1846   if(twopass_file)fclose(twopass_file);
1847
1848   fprintf(stderr,"\r   \ndone.\n\n");
1849
1850   return(0);
1851
1852 }