3d310b29276e87c31de058fd883b0dc5075538c7
[theora.git] / lib / decode.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12
13   function:
14     last mod: $Id$
15
16  ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296
297
298
299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327
328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358
359
360
361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367
368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
404        _dec->state.dequant_tables[qti][pli][qi][17]+
405        _dec->state.dequant_tables[qti][pli][qi][18]+
406        _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry=0;
421   _dec->telemetry_bits=0;
422   _dec->telemetry_qi=0;
423   _dec->telemetry_mbmode=0;
424   _dec->telemetry_mv=0;
425   _dec->telemetry_frame_data=NULL;
426 #endif
427   return 0;
428 }
429
430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432   _ogg_free(_dec->telemetry_frame_data);
433 #endif
434   _ogg_free(_dec->pp_frame_data);
435   _ogg_free(_dec->variances);
436   _ogg_free(_dec->dc_qis);
437   _ogg_free(_dec->dct_tokens);
438   oc_huff_trees_clear(_dec->huff_tables);
439   oc_state_clear(&_dec->state);
440 }
441
442
443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444   long val;
445   /*Check to make sure this is a data packet.*/
446   val=oc_pack_read1(&_dec->opb);
447   if(val!=0)return TH_EBADPACKET;
448   /*Read in the frame type (I or P).*/
449   val=oc_pack_read1(&_dec->opb);
450   _dec->state.frame_type=(int)val;
451   /*Read in the qi list.*/
452   val=oc_pack_read(&_dec->opb,6);
453   _dec->state.qis[0]=(unsigned char)val;
454   val=oc_pack_read1(&_dec->opb);
455   if(!val)_dec->state.nqis=1;
456   else{
457     val=oc_pack_read(&_dec->opb,6);
458     _dec->state.qis[1]=(unsigned char)val;
459     val=oc_pack_read1(&_dec->opb);
460     if(!val)_dec->state.nqis=2;
461     else{
462       val=oc_pack_read(&_dec->opb,6);
463       _dec->state.qis[2]=(unsigned char)val;
464       _dec->state.nqis=3;
465     }
466   }
467   if(_dec->state.frame_type==OC_INTRA_FRAME){
468     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469       Most of the other unused bits in the VP3 headers were eliminated.
470       I don't know why these remain.*/
471     /*I wanted to eliminate wasted bits, but not all config wiggle room
472        --Monty.*/
473     val=oc_pack_read(&_dec->opb,3);
474     if(val!=0)return TH_EIMPL;
475   }
476   return 0;
477 }
478
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480   This also builds up the coded fragment list (in coded order), and clears the
481    uncoded fragment list.
482   It does not update the coded macro block list nor the super block flags, as
483    those are not used when decoding INTRA frames.*/
484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485   const oc_sb_map   *sb_maps;
486   const oc_sb_flags *sb_flags;
487   oc_fragment       *frags;
488   ptrdiff_t         *coded_fragis;
489   ptrdiff_t          ncoded_fragis;
490   ptrdiff_t          prev_ncoded_fragis;
491   unsigned           nsbs;
492   unsigned           sbi;
493   int                pli;
494   coded_fragis=_dec->state.coded_fragis;
495   prev_ncoded_fragis=ncoded_fragis=0;
496   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497   sb_flags=_dec->state.sb_flags;
498   frags=_dec->state.frags;
499   sbi=nsbs=0;
500   for(pli=0;pli<3;pli++){
501     nsbs+=_dec->state.fplanes[pli].nsbs;
502     for(;sbi<nsbs;sbi++){
503       int quadi;
504       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505         int bi;
506         for(bi=0;bi<4;bi++){
507           ptrdiff_t fragi;
508           fragi=sb_maps[sbi][quadi][bi];
509           if(fragi>=0){
510             frags[fragi].coded=1;
511             frags[fragi].refi=OC_FRAME_SELF;
512             frags[fragi].mb_mode=OC_MODE_INTRA;
513             coded_fragis[ncoded_fragis++]=fragi;
514           }
515         }
516       }
517     }
518     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519     prev_ncoded_fragis=ncoded_fragis;
520   }
521   _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523
524 /*Decodes the bit flags indicating whether each super block is partially coded
525    or not.
526   Return: The number of partially coded super blocks.*/
527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528   oc_sb_flags *sb_flags;
529   unsigned     nsbs;
530   unsigned     sbi;
531   unsigned     npartial;
532   unsigned     run_count;
533   long         val;
534   int          flag;
535   val=oc_pack_read1(&_dec->opb);
536   flag=(int)val;
537   sb_flags=_dec->state.sb_flags;
538   nsbs=_dec->state.nsbs;
539   sbi=npartial=0;
540   while(sbi<nsbs){
541     int full_run;
542     run_count=oc_sb_run_unpack(&_dec->opb);
543     full_run=run_count>=4129;
544     do{
545       sb_flags[sbi].coded_partially=flag;
546       sb_flags[sbi].coded_fully=0;
547       npartial+=flag;
548       sbi++;
549     }
550     while(--run_count>0&&sbi<nsbs);
551     if(full_run&&sbi<nsbs){
552       val=oc_pack_read1(&_dec->opb);
553       flag=(int)val;
554     }
555     else flag=!flag;
556   }
557   /*TODO: run_count should be 0 here.
558     If it's not, we should issue a warning of some kind.*/
559   return npartial;
560 }
561
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563    block is fully coded or not.
564   This function should only be called if there is at least one
565    non-partially-coded super block.
566   Return: The number of partially coded super blocks.*/
567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568   oc_sb_flags *sb_flags;
569   unsigned     nsbs;
570   unsigned     sbi;
571   unsigned     run_count;
572   long         val;
573   int          flag;
574   sb_flags=_dec->state.sb_flags;
575   nsbs=_dec->state.nsbs;
576   /*Skip partially coded super blocks.*/
577   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578   val=oc_pack_read1(&_dec->opb);
579   flag=(int)val;
580   do{
581     int full_run;
582     run_count=oc_sb_run_unpack(&_dec->opb);
583     full_run=run_count>=4129;
584     for(;sbi<nsbs;sbi++){
585       if(sb_flags[sbi].coded_partially)continue;
586       if(run_count--<=0)break;
587       sb_flags[sbi].coded_fully=flag;
588     }
589     if(full_run&&sbi<nsbs){
590       val=oc_pack_read1(&_dec->opb);
591       flag=(int)val;
592     }
593     else flag=!flag;
594   }
595   while(sbi<nsbs);
596   /*TODO: run_count should be 0 here.
597     If it's not, we should issue a warning of some kind.*/
598 }
599
600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601   const oc_sb_map   *sb_maps;
602   const oc_sb_flags *sb_flags;
603   oc_fragment       *frags;
604   unsigned           nsbs;
605   unsigned           sbi;
606   unsigned           npartial;
607   long               val;
608   int                pli;
609   int                flag;
610   int                run_count;
611   ptrdiff_t         *coded_fragis;
612   ptrdiff_t         *uncoded_fragis;
613   ptrdiff_t          ncoded_fragis;
614   ptrdiff_t          nuncoded_fragis;
615   ptrdiff_t          prev_ncoded_fragis;
616   npartial=oc_dec_partial_sb_flags_unpack(_dec);
617   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
618   if(npartial>0){
619     val=oc_pack_read1(&_dec->opb);
620     flag=!(int)val;
621   }
622   else flag=0;
623   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
624   sb_flags=_dec->state.sb_flags;
625   frags=_dec->state.frags;
626   sbi=nsbs=run_count=0;
627   coded_fragis=_dec->state.coded_fragis;
628   uncoded_fragis=coded_fragis+_dec->state.nfrags;
629   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
630   for(pli=0;pli<3;pli++){
631     nsbs+=_dec->state.fplanes[pli].nsbs;
632     for(;sbi<nsbs;sbi++){
633       int quadi;
634       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
635         int bi;
636         for(bi=0;bi<4;bi++){
637           ptrdiff_t fragi;
638           fragi=sb_maps[sbi][quadi][bi];
639           if(fragi>=0){
640             int coded;
641             if(sb_flags[sbi].coded_fully)coded=1;
642             else if(!sb_flags[sbi].coded_partially)coded=0;
643             else{
644               if(run_count<=0){
645                 run_count=oc_block_run_unpack(&_dec->opb);
646                 flag=!flag;
647               }
648               run_count--;
649               coded=flag;
650             }
651             if(coded)coded_fragis[ncoded_fragis++]=fragi;
652             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
653             frags[fragi].coded=coded;
654             frags[fragi].refi=OC_FRAME_NONE;
655           }
656         }
657       }
658     }
659     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
660     prev_ncoded_fragis=ncoded_fragis;
661   }
662   _dec->state.ntotal_coded_fragis=ncoded_fragis;
663   /*TODO: run_count should be 0 here.
664     If it's not, we should issue a warning of some kind.*/
665 }
666
667
668 /*Coding scheme:
669    Codeword            Mode Index
670    0                       0
671    10                      1
672    110                     2
673    1110                    3
674    11110                   4
675    111110                  5
676    1111110                 6
677    1111111                 7*/
678 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
679   4,
680    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
681    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
682    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
683    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
684     3,
685      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
686      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
687 };
688
689 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
690   3,
691    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
692    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
693 };
694
695 /*Unpacks the list of macro block modes for INTER frames.*/
696 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
697   const oc_mb_map     *mb_maps;
698   signed char         *mb_modes;
699   const oc_fragment   *frags;
700   const unsigned char *alphabet;
701   unsigned char        scheme0_alphabet[8];
702   const ogg_int16_t   *mode_tree;
703   size_t               nmbs;
704   size_t               mbi;
705   long                 val;
706   int                  mode_scheme;
707   val=oc_pack_read(&_dec->opb,3);
708   mode_scheme=(int)val;
709   if(mode_scheme==0){
710     int mi;
711     /*Just in case, initialize the modes to something.
712       If the bitstream doesn't contain each index exactly once, it's likely
713        corrupt and the rest of the packet is garbage anyway, but this way we
714        won't crash, and we'll decode SOMETHING.*/
715     /*LOOP VECTORIZES*/
716     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
717     for(mi=0;mi<OC_NMODES;mi++){
718       val=oc_pack_read(&_dec->opb,3);
719       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
720     }
721     alphabet=scheme0_alphabet;
722   }
723   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
724   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
725   mb_modes=_dec->state.mb_modes;
726   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
727   nmbs=_dec->state.nmbs;
728   frags=_dec->state.frags;
729   for(mbi=0;mbi<nmbs;mbi++){
730     if(mb_modes[mbi]!=OC_MODE_INVALID){
731       /*Check for a coded luma block in this macro block.*/
732       if(frags[mb_maps[mbi][0][0]].coded
733        ||frags[mb_maps[mbi][0][1]].coded
734        ||frags[mb_maps[mbi][0][2]].coded
735        ||frags[mb_maps[mbi][0][3]].coded){
736         /*We found one, decode a mode.*/
737         mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
738       }
739       /*There were none: INTER_NOMV is forced.*/
740       else mb_modes[mbi]=OC_MODE_INTER_NOMV;
741     }
742   }
743 }
744
745
746
747 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
748   5,
749    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
750    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
751    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
752    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
753    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
754    33,          36,          39,          42,
755    45,          50,          55,          60,
756    65,          74,          83,          92,
757     1,-(1<<8|32+4),-(1<<8|32-4),
758     1,-(1<<8|32+5),-(1<<8|32-5),
759     1,-(1<<8|32+6),-(1<<8|32-6),
760     1,-(1<<8|32+7),-(1<<8|32-7),
761     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
762     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
763     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
764     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
765     3,
766      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
767      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
768     3,
769      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
770      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
771     3,
772      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
773      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
774     3,
775      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
776      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
777 };
778
779 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
780   6,
781    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
782    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
783    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
784    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
785    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
786    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
787    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
788    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
789    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
790    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
791    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
792    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
793    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
794    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
795    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
796    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
797 };
798
799
800 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
801   int dx;
802   int dy;
803   dx=oc_huff_token_decode(_opb,_tree)-32;
804   dy=oc_huff_token_decode(_opb,_tree)-32;
805   return OC_MV(dx,dy);
806 }
807
808 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
809    block modes and motion vectors to the individual fragments.*/
810 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
811   const oc_mb_map        *mb_maps;
812   const signed char      *mb_modes;
813   oc_set_chroma_mvs_func  set_chroma_mvs;
814   const ogg_int16_t      *mv_comp_tree;
815   oc_fragment            *frags;
816   oc_mv                  *frag_mvs;
817   const unsigned char    *map_idxs;
818   int                     map_nidxs;
819   oc_mv                   last_mv;
820   oc_mv                   prior_mv;
821   oc_mv                   cbmvs[4];
822   size_t                  nmbs;
823   size_t                  mbi;
824   long                    val;
825   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
826   val=oc_pack_read1(&_dec->opb);
827   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
828   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
829   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
830   prior_mv=last_mv=0;
831   frags=_dec->state.frags;
832   frag_mvs=_dec->state.frag_mvs;
833   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
834   mb_modes=_dec->state.mb_modes;
835   nmbs=_dec->state.nmbs;
836   for(mbi=0;mbi<nmbs;mbi++){
837     int mb_mode;
838     mb_mode=mb_modes[mbi];
839     if(mb_mode!=OC_MODE_INVALID){
840       oc_mv     mbmv;
841       ptrdiff_t fragi;
842       int       coded[13];
843       int       codedi;
844       int       ncoded;
845       int       mapi;
846       int       mapii;
847       int       refi;
848       /*Search for at least one coded fragment.*/
849       ncoded=mapii=0;
850       do{
851         mapi=map_idxs[mapii];
852         fragi=mb_maps[mbi][mapi>>2][mapi&3];
853         if(frags[fragi].coded)coded[ncoded++]=mapi;
854       }
855       while(++mapii<map_nidxs);
856       if(ncoded<=0)continue;
857       refi=OC_FRAME_FOR_MODE(mb_mode);
858       switch(mb_mode){
859         case OC_MODE_INTER_MV_FOUR:{
860           oc_mv       lbmvs[4];
861           int         bi;
862           /*Mark the tail of the list, so we don't accidentally go past it.*/
863           coded[ncoded]=-1;
864           for(bi=codedi=0;bi<4;bi++){
865             if(coded[codedi]==bi){
866               codedi++;
867               fragi=mb_maps[mbi][0][bi];
868               frags[fragi].refi=refi;
869               frags[fragi].mb_mode=mb_mode;
870               lbmvs[bi]=oc_mv_unpack(&_dec->opb,mv_comp_tree);
871               frag_mvs[fragi]=lbmvs[bi];
872             }
873             else lbmvs[bi]=0;
874           }
875           if(codedi>0){
876             prior_mv=last_mv;
877             last_mv=lbmvs[coded[codedi-1]];
878           }
879           if(codedi<ncoded){
880             (*set_chroma_mvs)(cbmvs,lbmvs);
881             for(;codedi<ncoded;codedi++){
882               mapi=coded[codedi];
883               bi=mapi&3;
884               fragi=mb_maps[mbi][mapi>>2][bi];
885               frags[fragi].refi=refi;
886               frags[fragi].mb_mode=mb_mode;
887               frag_mvs[fragi]=cbmvs[bi];
888             }
889           }
890         }break;
891         case OC_MODE_INTER_MV:{
892           prior_mv=last_mv;
893           last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
894         }break;
895         case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
896         case OC_MODE_INTER_MV_LAST2:{
897           mbmv=prior_mv;
898           prior_mv=last_mv;
899           last_mv=mbmv;
900         }break;
901         case OC_MODE_GOLDEN_MV:{
902           mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
903         }break;
904         default:mbmv=0;break;
905       }
906       /*4MV mode fills in the fragments itself.
907         For all other modes we can use this common code.*/
908       if(mb_mode!=OC_MODE_INTER_MV_FOUR){
909         for(codedi=0;codedi<ncoded;codedi++){
910           mapi=coded[codedi];
911           fragi=mb_maps[mbi][mapi>>2][mapi&3];
912           frags[fragi].refi=refi;
913           frags[fragi].mb_mode=mb_mode;
914           frag_mvs[fragi]=mbmv;
915         }
916       }
917     }
918   }
919 }
920
921 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
922   oc_fragment     *frags;
923   const ptrdiff_t *coded_fragis;
924   ptrdiff_t        ncoded_fragis;
925   ptrdiff_t        fragii;
926   ptrdiff_t        fragi;
927   ncoded_fragis=_dec->state.ntotal_coded_fragis;
928   if(ncoded_fragis<=0)return;
929   frags=_dec->state.frags;
930   coded_fragis=_dec->state.coded_fragis;
931   if(_dec->state.nqis==1){
932     /*If this frame has only a single qi value, then just use it for all coded
933        fragments.*/
934     for(fragii=0;fragii<ncoded_fragis;fragii++){
935       frags[coded_fragis[fragii]].qii=0;
936     }
937   }
938   else{
939     long val;
940     int  flag;
941     int  nqi1;
942     int  run_count;
943     /*Otherwise, we decode a qi index for each fragment, using two passes of
944       the same binary RLE scheme used for super-block coded bits.
945      The first pass marks each fragment as having a qii of 0 or greater than
946       0, and the second pass (if necessary), distinguishes between a qii of
947       1 and 2.
948      At first we just store the qii in the fragment.
949      After all the qii's are decoded, we make a final pass to replace them
950       with the corresponding qi's for this frame.*/
951     val=oc_pack_read1(&_dec->opb);
952     flag=(int)val;
953     nqi1=0;
954     fragii=0;
955     while(fragii<ncoded_fragis){
956       int full_run;
957       run_count=oc_sb_run_unpack(&_dec->opb);
958       full_run=run_count>=4129;
959       do{
960         frags[coded_fragis[fragii++]].qii=flag;
961         nqi1+=flag;
962       }
963       while(--run_count>0&&fragii<ncoded_fragis);
964       if(full_run&&fragii<ncoded_fragis){
965         val=oc_pack_read1(&_dec->opb);
966         flag=(int)val;
967       }
968       else flag=!flag;
969     }
970     /*TODO: run_count should be 0 here.
971       If it's not, we should issue a warning of some kind.*/
972     /*If we have 3 different qi's for this frame, and there was at least one
973        fragment with a non-zero qi, make the second pass.*/
974     if(_dec->state.nqis==3&&nqi1>0){
975       /*Skip qii==0 fragments.*/
976       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
977       val=oc_pack_read1(&_dec->opb);
978       flag=(int)val;
979       do{
980         int full_run;
981         run_count=oc_sb_run_unpack(&_dec->opb);
982         full_run=run_count>=4129;
983         for(;fragii<ncoded_fragis;fragii++){
984           fragi=coded_fragis[fragii];
985           if(frags[fragi].qii==0)continue;
986           if(run_count--<=0)break;
987           frags[fragi].qii+=flag;
988         }
989         if(full_run&&fragii<ncoded_fragis){
990           val=oc_pack_read1(&_dec->opb);
991           flag=(int)val;
992         }
993         else flag=!flag;
994       }
995       while(fragii<ncoded_fragis);
996       /*TODO: run_count should be 0 here.
997         If it's not, we should issue a warning of some kind.*/
998     }
999   }
1000 }
1001
1002
1003
1004 /*Unpacks the DC coefficient tokens.
1005   Unlike when unpacking the AC coefficient tokens, we actually need to decode
1006    the DC coefficient values now so that we can do DC prediction.
1007   _huff_idx:   The index of the Huffman table to use for each color plane.
1008   _ntoks_left: The number of tokens left to be decoded in each color plane for
1009                 each coefficient.
1010                This is updated as EOB tokens and zero run tokens are decoded.
1011   Return: The length of any outstanding EOB run.*/
1012 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
1013  ptrdiff_t _ntoks_left[3][64]){
1014   unsigned char   *dct_tokens;
1015   oc_fragment     *frags;
1016   const ptrdiff_t *coded_fragis;
1017   ptrdiff_t        ncoded_fragis;
1018   ptrdiff_t        fragii;
1019   ptrdiff_t        eobs;
1020   ptrdiff_t        ti;
1021   int              pli;
1022   dct_tokens=_dec->dct_tokens;
1023   frags=_dec->state.frags;
1024   coded_fragis=_dec->state.coded_fragis;
1025   ncoded_fragis=fragii=eobs=ti=0;
1026   for(pli=0;pli<3;pli++){
1027     ptrdiff_t run_counts[64];
1028     ptrdiff_t eob_count;
1029     ptrdiff_t eobi;
1030     int       rli;
1031     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1032     memset(run_counts,0,sizeof(run_counts));
1033     _dec->eob_runs[pli][0]=eobs;
1034     _dec->ti0[pli][0]=ti;
1035     /*Continue any previous EOB run, if there was one.*/
1036     eobi=eobs;
1037     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1038     eob_count=eobi;
1039     eobs-=eobi;
1040     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1041     while(fragii<ncoded_fragis){
1042       int token;
1043       int cw;
1044       int eb;
1045       int skip;
1046       token=oc_huff_token_decode(&_dec->opb,
1047        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1048       dct_tokens[ti++]=(unsigned char)token;
1049       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1050         eb=(int)oc_pack_read(&_dec->opb,
1051          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1052         dct_tokens[ti++]=(unsigned char)eb;
1053         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1054         eb<<=OC_DCT_TOKEN_EB_POS(token);
1055       }
1056       else eb=0;
1057       cw=OC_DCT_CODE_WORD[token]+eb;
1058       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1059       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1060       if(eobs){
1061         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1062         eob_count+=eobi;
1063         eobs-=eobi;
1064         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1065       }
1066       else{
1067         int coeff;
1068         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1069         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1070         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1071         if(skip)coeff=0;
1072         run_counts[skip]++;
1073         frags[coded_fragis[fragii++]].dc=coeff;
1074       }
1075     }
1076     /*Add the total EOB count to the longest run length.*/
1077     run_counts[63]+=eob_count;
1078     /*And convert the run_counts array to a moment table.*/
1079     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1080     /*Finally, subtract off the number of coefficients that have been
1081        accounted for by runs started in this coefficient.*/
1082     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1083   }
1084   _dec->dct_tokens_count=ti;
1085   return eobs;
1086 }
1087
1088 /*Unpacks the AC coefficient tokens.
1089   This can completely discard coefficient values while unpacking, and so is
1090    somewhat simpler than unpacking the DC coefficient tokens.
1091   _huff_idx:   The index of the Huffman table to use for each color plane.
1092   _ntoks_left: The number of tokens left to be decoded in each color plane for
1093                 each coefficient.
1094                This is updated as EOB tokens and zero run tokens are decoded.
1095   _eobs:       The length of any outstanding EOB run from previous
1096                 coefficients.
1097   Return: The length of any outstanding EOB run.*/
1098 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1099  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1100   unsigned char *dct_tokens;
1101   ptrdiff_t      ti;
1102   int            pli;
1103   dct_tokens=_dec->dct_tokens;
1104   ti=_dec->dct_tokens_count;
1105   for(pli=0;pli<3;pli++){
1106     ptrdiff_t run_counts[64];
1107     ptrdiff_t eob_count;
1108     size_t    ntoks_left;
1109     size_t    ntoks;
1110     int       rli;
1111     _dec->eob_runs[pli][_zzi]=_eobs;
1112     _dec->ti0[pli][_zzi]=ti;
1113     ntoks_left=_ntoks_left[pli][_zzi];
1114     memset(run_counts,0,sizeof(run_counts));
1115     eob_count=0;
1116     ntoks=0;
1117     while(ntoks+_eobs<ntoks_left){
1118       int token;
1119       int cw;
1120       int eb;
1121       int skip;
1122       ntoks+=_eobs;
1123       eob_count+=_eobs;
1124       token=oc_huff_token_decode(&_dec->opb,
1125        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1126       dct_tokens[ti++]=(unsigned char)token;
1127       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1128         eb=(int)oc_pack_read(&_dec->opb,
1129          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1130         dct_tokens[ti++]=(unsigned char)eb;
1131         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1132         eb<<=OC_DCT_TOKEN_EB_POS(token);
1133       }
1134       else eb=0;
1135       cw=OC_DCT_CODE_WORD[token]+eb;
1136       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1137       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1138       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1139       if(_eobs==0){
1140         run_counts[skip]++;
1141         ntoks++;
1142       }
1143     }
1144     /*Add the portion of the last EOB run actually used by this coefficient.*/
1145     eob_count+=ntoks_left-ntoks;
1146     /*And remove it from the remaining EOB count.*/
1147     _eobs-=ntoks_left-ntoks;
1148     /*Add the total EOB count to the longest run length.*/
1149     run_counts[63]+=eob_count;
1150     /*And convert the run_counts array to a moment table.*/
1151     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1152     /*Finally, subtract off the number of coefficients that have been
1153        accounted for by runs started in this coefficient.*/
1154     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1155   }
1156   _dec->dct_tokens_count=ti;
1157   return _eobs;
1158 }
1159
1160 /*Tokens describing the DCT coefficients that belong to each fragment are
1161    stored in the bitstream grouped by coefficient, not by fragment.
1162
1163   This means that we either decode all the tokens in order, building up a
1164    separate coefficient list for each fragment as we go, and then go back and
1165    do the iDCT on each fragment, or we have to create separate lists of tokens
1166    for each coefficient, so that we can pull the next token required off the
1167    head of the appropriate list when decoding a specific fragment.
1168
1169   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1170    decoded coefficient values.
1171
1172   We take the second option, which lets us store just one to three bytes per
1173    token (generally far fewer than the number of coefficients, due to EOB
1174    tokens and zero runs), and which requires us to only maintain a counter for
1175    each of the 64 coefficients, instead of a counter for every fragment to
1176    determine where the next token goes.
1177
1178   We actually use 3 counters per coefficient, one for each color plane, so we
1179    can decode all color planes simultaneously.
1180   This lets color conversion, etc., be done as soon as a full MCU (one or
1181    two super block rows) is decoded, while the image data is still in cache.*/
1182
1183 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1184   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1185   ptrdiff_t  ntoks_left[3][64];
1186   int        huff_idxs[2];
1187   ptrdiff_t  eobs;
1188   long       val;
1189   int        pli;
1190   int        zzi;
1191   int        hgi;
1192   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1193     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1194   }
1195   val=oc_pack_read(&_dec->opb,4);
1196   huff_idxs[0]=(int)val;
1197   val=oc_pack_read(&_dec->opb,4);
1198   huff_idxs[1]=(int)val;
1199   _dec->eob_runs[0][0]=0;
1200   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1201 #if defined(HAVE_CAIRO)
1202   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1203 #endif
1204   val=oc_pack_read(&_dec->opb,4);
1205   huff_idxs[0]=(int)val;
1206   val=oc_pack_read(&_dec->opb,4);
1207   huff_idxs[1]=(int)val;
1208   zzi=1;
1209   for(hgi=1;hgi<5;hgi++){
1210     huff_idxs[0]+=16;
1211     huff_idxs[1]+=16;
1212     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1213       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1214     }
1215   }
1216   /*TODO: eobs should be exactly zero, or 4096 or greater.
1217     The second case occurs when an EOB run of size zero is encountered, which
1218      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1219     If neither of these conditions holds, then a warning should be issued.*/
1220 }
1221
1222
1223 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1224   /*pp_level 0: disabled; free any memory used and return*/
1225   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1226     if(_dec->dc_qis!=NULL){
1227       _ogg_free(_dec->dc_qis);
1228       _dec->dc_qis=NULL;
1229       _ogg_free(_dec->variances);
1230       _dec->variances=NULL;
1231       _ogg_free(_dec->pp_frame_data);
1232       _dec->pp_frame_data=NULL;
1233     }
1234     return 1;
1235   }
1236   if(_dec->dc_qis==NULL){
1237     /*If we haven't been tracking DC quantization indices, there's no point in
1238        starting now.*/
1239     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1240     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1241      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1242     if(_dec->dc_qis==NULL)return 1;
1243     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1244   }
1245   else{
1246     unsigned char   *dc_qis;
1247     const ptrdiff_t *coded_fragis;
1248     ptrdiff_t        ncoded_fragis;
1249     ptrdiff_t        fragii;
1250     unsigned char    qi0;
1251     /*Update the DC quantization index of each coded block.*/
1252     dc_qis=_dec->dc_qis;
1253     coded_fragis=_dec->state.coded_fragis;
1254     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1255      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1256     qi0=(unsigned char)_dec->state.qis[0];
1257     for(fragii=0;fragii<ncoded_fragis;fragii++){
1258       dc_qis[coded_fragis[fragii]]=qi0;
1259     }
1260   }
1261   /*pp_level 1: Stop after updating DC quantization indices.*/
1262   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1263     if(_dec->variances!=NULL){
1264       _ogg_free(_dec->variances);
1265       _dec->variances=NULL;
1266       _ogg_free(_dec->pp_frame_data);
1267       _dec->pp_frame_data=NULL;
1268     }
1269     return 1;
1270   }
1271   if(_dec->variances==NULL){
1272     size_t frame_sz;
1273     size_t c_sz;
1274     int    c_w;
1275     int    c_h;
1276     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1277     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1278     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1279     c_sz=c_w*(size_t)c_h;
1280     /*Allocate space for the chroma planes, even if we're not going to use
1281        them; this simplifies allocation state management, though it may waste
1282        memory on the few systems that don't overcommit pages.*/
1283     frame_sz+=c_sz<<1;
1284     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1285      frame_sz*sizeof(_dec->pp_frame_data[0]));
1286     _dec->variances=(int *)_ogg_malloc(
1287      _dec->state.nfrags*sizeof(_dec->variances[0]));
1288     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1289       _ogg_free(_dec->pp_frame_data);
1290       _dec->pp_frame_data=NULL;
1291       _ogg_free(_dec->variances);
1292       _dec->variances=NULL;
1293       return 1;
1294     }
1295     /*Force an update of the PP buffer pointers.*/
1296     _dec->pp_frame_state=0;
1297   }
1298   /*Update the PP buffer pointers if necessary.*/
1299   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1300     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1301       /*If chroma processing is disabled, just use the PP luma plane.*/
1302       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1303       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1304       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1305       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1306        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1307     }
1308     else{
1309       size_t y_sz;
1310       size_t c_sz;
1311       int    c_w;
1312       int    c_h;
1313       /*Otherwise, set up pointers to all three PP planes.*/
1314       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1315       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1316       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1317       c_sz=c_w*(size_t)c_h;
1318       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1319       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1320       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1321       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1322       _dec->pp_frame_buf[1].width=c_w;
1323       _dec->pp_frame_buf[1].height=c_h;
1324       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1325       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1326       _dec->pp_frame_buf[2].width=c_w;
1327       _dec->pp_frame_buf[2].height=c_h;
1328       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1329       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1330       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1331     }
1332     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1333   }
1334   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1335   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1336     memcpy(_dec->pp_frame_buf+1,
1337      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1338      sizeof(_dec->pp_frame_buf[1])*2);
1339   }
1340   return 0;
1341 }
1342
1343
1344 /*Initialize the main decoding pipeline.*/
1345 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1346  oc_dec_pipeline_state *_pipe){
1347   const ptrdiff_t *coded_fragis;
1348   const ptrdiff_t *uncoded_fragis;
1349   int              flimit;
1350   int              pli;
1351   int              qii;
1352   int              qti;
1353   int              zzi;
1354   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1355      super block rows of Y' for each super block row of Cb and Cr.*/
1356   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1357   /*Initialize the token and extra bits indices for each plane and
1358      coefficient.*/
1359   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1360   /*Also copy over the initial the EOB run counts.*/
1361   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1362   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1363   coded_fragis=_dec->state.coded_fragis;
1364   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1365   for(pli=0;pli<3;pli++){
1366     ptrdiff_t ncoded_fragis;
1367     _pipe->coded_fragis[pli]=coded_fragis;
1368     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1369     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1370     coded_fragis+=ncoded_fragis;
1371     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1372   }
1373   /*Set up condensed quantizer tables.*/
1374   for(pli=0;pli<3;pli++){
1375     for(qii=0;qii<_dec->state.nqis;qii++){
1376       for(qti=0;qti<2;qti++){
1377         _pipe->dequant[pli][qii][qti]=
1378          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1379       }
1380     }
1381   }
1382   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1383   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1384   /*Initialize the bounding value array for the loop filter.*/
1385   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1386   _pipe->loop_filter=flimit!=0;
1387   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1388   /*Initialize any buffers needed for post-processing.
1389     We also save the current post-processing level, to guard against the user
1390      changing it from a callback.*/
1391   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1392   /*If we don't have enough information to post-process, disable it, regardless
1393      of the user-requested level.*/
1394   else{
1395     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1396     memcpy(_dec->pp_frame_buf,
1397      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1398      sizeof(_dec->pp_frame_buf[0])*3);
1399   }
1400   /*Clear down the DCT coefficient buffer for the first block.*/
1401   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1402 }
1403
1404 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1405    rows).
1406   As a side effect, the number of coded and uncoded fragments in this plane of
1407    the MCU is also computed.*/
1408 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1409  oc_dec_pipeline_state *_pipe,int _pli){
1410   const oc_fragment_plane *fplane;
1411   oc_fragment             *frags;
1412   int                     *pred_last;
1413   ptrdiff_t                ncoded_fragis;
1414   ptrdiff_t                fragi;
1415   int                      fragx;
1416   int                      fragy;
1417   int                      fragy0;
1418   int                      fragy_end;
1419   int                      nhfrags;
1420   /*Compute the first and last fragment row of the current MCU for this
1421      plane.*/
1422   fplane=_dec->state.fplanes+_pli;
1423   fragy0=_pipe->fragy0[_pli];
1424   fragy_end=_pipe->fragy_end[_pli];
1425   nhfrags=fplane->nhfrags;
1426   pred_last=_pipe->pred_last[_pli];
1427   frags=_dec->state.frags;
1428   ncoded_fragis=0;
1429   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1430   for(fragy=fragy0;fragy<fragy_end;fragy++){
1431     if(fragy==0){
1432       /*For the first row, all of the cases reduce to just using the previous
1433          predictor for the same reference frame.*/
1434       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1435         if(frags[fragi].coded){
1436           int refi;
1437           refi=frags[fragi].refi;
1438           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1439           ncoded_fragis++;
1440         }
1441       }
1442     }
1443     else{
1444       oc_fragment *u_frags;
1445       int          l_ref;
1446       int          ul_ref;
1447       int          u_ref;
1448       u_frags=frags-nhfrags;
1449       l_ref=-1;
1450       ul_ref=-1;
1451       u_ref=u_frags[fragi].refi;
1452       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1453         int ur_ref;
1454         if(fragx+1>=nhfrags)ur_ref=-1;
1455         else ur_ref=u_frags[fragi+1].refi;
1456         if(frags[fragi].coded){
1457           int pred;
1458           int refi;
1459           refi=frags[fragi].refi;
1460           /*We break out a separate case based on which of our neighbors use
1461              the same reference frames.
1462             This is somewhat faster than trying to make a generic case which
1463              handles all of them, since it reduces lots of poorly predicted
1464              jumps to one switch statement, and also lets a number of the
1465              multiplications be optimized out by strength reduction.*/
1466           switch((l_ref==refi)|(ul_ref==refi)<<1|
1467            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1468             default:pred=pred_last[refi];break;
1469             case  1:
1470             case  3:pred=frags[fragi-1].dc;break;
1471             case  2:pred=u_frags[fragi-1].dc;break;
1472             case  4:
1473             case  6:
1474             case 12:pred=u_frags[fragi].dc;break;
1475             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1476             case  8:pred=u_frags[fragi+1].dc;break;
1477             case  9:
1478             case 11:
1479             case 13:{
1480               /*The TI compiler mis-compiles this line.*/
1481               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1482             }break;
1483             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1484             case 14:{
1485               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1486                +10*u_frags[fragi].dc)/16;
1487             }break;
1488             case  7:
1489             case 15:{
1490               int p0;
1491               int p1;
1492               int p2;
1493               p0=frags[fragi-1].dc;
1494               p1=u_frags[fragi-1].dc;
1495               p2=u_frags[fragi].dc;
1496               pred=(29*(p0+p2)-26*p1)/32;
1497               if(abs(pred-p2)>128)pred=p2;
1498               else if(abs(pred-p0)>128)pred=p0;
1499               else if(abs(pred-p1)>128)pred=p1;
1500             }break;
1501           }
1502           pred_last[refi]=frags[fragi].dc+=pred;
1503           ncoded_fragis++;
1504           l_ref=refi;
1505         }
1506         else l_ref=-1;
1507         ul_ref=u_ref;
1508         u_ref=ur_ref;
1509       }
1510     }
1511   }
1512   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1513   /*Also save the number of uncoded fragments so we know how many to copy.*/
1514   _pipe->nuncoded_fragis[_pli]=
1515    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1516 }
1517
1518 /*Reconstructs all coded fragments in a single MCU (one or two super block
1519    rows).
1520   This requires that each coded fragment have a proper macro block mode and
1521    motion vector (if not in INTRA mode), and have its DC value decoded, with
1522    the DC prediction process reversed, and the number of coded and uncoded
1523    fragments in this plane of the MCU be counted.
1524   The token lists for each color plane and coefficient should also be filled
1525    in, along with initial token offsets, extra bits offsets, and EOB run
1526    counts.*/
1527 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1528  oc_dec_pipeline_state *_pipe,int _pli){
1529   unsigned char       *dct_tokens;
1530   const unsigned char *dct_fzig_zag;
1531   ogg_uint16_t         dc_quant[2];
1532   const oc_fragment   *frags;
1533   const ptrdiff_t     *coded_fragis;
1534   ptrdiff_t            ncoded_fragis;
1535   ptrdiff_t            fragii;
1536   ptrdiff_t           *ti;
1537   ptrdiff_t           *eob_runs;
1538   int                  qti;
1539   dct_tokens=_dec->dct_tokens;
1540   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1541   frags=_dec->state.frags;
1542   coded_fragis=_pipe->coded_fragis[_pli];
1543   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1544   ti=_pipe->ti[_pli];
1545   eob_runs=_pipe->eob_runs[_pli];
1546   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1547   for(fragii=0;fragii<ncoded_fragis;fragii++){
1548     const ogg_uint16_t *ac_quant;
1549     ptrdiff_t           fragi;
1550     int                 last_zzi;
1551     int                 zzi;
1552     fragi=coded_fragis[fragii];
1553     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1554     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1555     /*Decode the AC coefficients.*/
1556     for(zzi=0;zzi<64;){
1557       int token;
1558       last_zzi=zzi;
1559       if(eob_runs[zzi]){
1560         eob_runs[zzi]--;
1561         break;
1562       }
1563       else{
1564         ptrdiff_t eob;
1565         int       cw;
1566         int       rlen;
1567         int       coeff;
1568         int       lti;
1569         lti=ti[zzi];
1570         token=dct_tokens[lti++];
1571         cw=OC_DCT_CODE_WORD[token];
1572         /*These parts could be done branchless, but the branches are fairly
1573            predictable and the C code translates into more than a few
1574            instructions, so it's worth it to avoid them.*/
1575         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1576           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1577         }
1578         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1579         if(token==OC_DCT_TOKEN_FAT_EOB){
1580           eob+=dct_tokens[lti++]<<8;
1581           if(eob==0)eob=OC_DCT_EOB_FINISH;
1582         }
1583         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1584         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1585         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1586         eob_runs[zzi]=eob;
1587         ti[zzi]=lti;
1588         zzi+=rlen;
1589         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1590          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1591         zzi+=!eob;
1592       }
1593     }
1594     /*TODO: zzi should be exactly 64 here.
1595       If it's not, we should report some kind of warning.*/
1596     zzi=OC_MINI(zzi,64);
1597     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1598     /*last_zzi is always initialized.
1599       If your compiler thinks otherwise, it is dumb.*/
1600     oc_state_frag_recon(&_dec->state,fragi,_pli,
1601      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1602   }
1603   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1604   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1605   /*TODO: We make the decision here to always copy the uncoded blocks into it
1606      from the reference frame.
1607     We could also copy the coded blocks back over the reference frame, if we
1608      wait for an additional MCU to be decoded, which might be faster if only a
1609      small number of blocks are coded.
1610     However, this introduces more latency, creating a larger cache footprint.
1611     It's unknown which decision is better, but this one results in simpler
1612      code, and the hard case (high bitrate, high resolution) is handled
1613      correctly.*/
1614   /*Copy the uncoded blocks from the previous reference frame.*/
1615   if(_pipe->nuncoded_fragis[_pli]>0){
1616     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1617     oc_frag_copy_list(&_dec->state,
1618      _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1619      _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_PREV]],
1620      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1621      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1622   }
1623 }
1624
1625 /*Filter a horizontal block edge.*/
1626 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1627  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1628  int *_variance0,int *_variance1){
1629   unsigned char       *rdst;
1630   const unsigned char *rsrc;
1631   unsigned char       *cdst;
1632   const unsigned char *csrc;
1633   int                  r[10];
1634   int                  sum0;
1635   int                  sum1;
1636   int                  bx;
1637   int                  by;
1638   rdst=_dst;
1639   rsrc=_src;
1640   for(bx=0;bx<8;bx++){
1641     cdst=rdst;
1642     csrc=rsrc;
1643     for(by=0;by<10;by++){
1644       r[by]=*csrc;
1645       csrc+=_src_ystride;
1646     }
1647     sum0=sum1=0;
1648     for(by=0;by<4;by++){
1649       sum0+=abs(r[by+1]-r[by]);
1650       sum1+=abs(r[by+5]-r[by+6]);
1651     }
1652     *_variance0+=OC_MINI(255,sum0);
1653     *_variance1+=OC_MINI(255,sum1);
1654     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1655       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1656       cdst+=_dst_ystride;
1657       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1658       cdst+=_dst_ystride;
1659       for(by=0;by<4;by++){
1660         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1661          r[by+4]+r[by+5]+r[by+6]+4>>3);
1662         cdst+=_dst_ystride;
1663       }
1664       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1665       cdst+=_dst_ystride;
1666       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1667     }
1668     else{
1669       for(by=1;by<=8;by++){
1670         *cdst=(unsigned char)r[by];
1671         cdst+=_dst_ystride;
1672       }
1673     }
1674     rdst++;
1675     rsrc++;
1676   }
1677 }
1678
1679 /*Filter a vertical block edge.*/
1680 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1681  int _qstep,int _flimit,int *_variances){
1682   unsigned char       *rdst;
1683   const unsigned char *rsrc;
1684   unsigned char       *cdst;
1685   int                  r[10];
1686   int                  sum0;
1687   int                  sum1;
1688   int                  bx;
1689   int                  by;
1690   cdst=_dst;
1691   for(by=0;by<8;by++){
1692     rsrc=cdst-1;
1693     rdst=cdst;
1694     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1695     sum0=sum1=0;
1696     for(bx=0;bx<4;bx++){
1697       sum0+=abs(r[bx+1]-r[bx]);
1698       sum1+=abs(r[bx+5]-r[bx+6]);
1699     }
1700     _variances[0]+=OC_MINI(255,sum0);
1701     _variances[1]+=OC_MINI(255,sum1);
1702     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1703       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1704       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1705       for(bx=0;bx<4;bx++){
1706         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1707          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1708       }
1709       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1710       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1711     }
1712     cdst+=_dst_ystride;
1713   }
1714 }
1715
1716 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1717  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1718  int _fragy_end){
1719   oc_fragment_plane   *fplane;
1720   int                 *variance;
1721   unsigned char       *dc_qi;
1722   unsigned char       *dst;
1723   const unsigned char *src;
1724   ptrdiff_t            froffset;
1725   int                  dst_ystride;
1726   int                  src_ystride;
1727   int                  nhfrags;
1728   int                  width;
1729   int                  notstart;
1730   int                  notdone;
1731   int                  flimit;
1732   int                  qstep;
1733   int                  y_end;
1734   int                  y;
1735   int                  x;
1736   _dst+=_pli;
1737   _src+=_pli;
1738   fplane=_dec->state.fplanes+_pli;
1739   nhfrags=fplane->nhfrags;
1740   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1741   variance=_dec->variances+froffset;
1742   dc_qi=_dec->dc_qis+froffset;
1743   notstart=_fragy0>0;
1744   notdone=_fragy_end<fplane->nvfrags;
1745   /*We want to clear an extra row of variances, except at the end.*/
1746   memset(variance+(nhfrags&-notstart),0,
1747    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1748   /*Except for the first time, we want to point to the middle of the row.*/
1749   y=(_fragy0<<3)+(notstart<<2);
1750   dst_ystride=_dst->stride;
1751   src_ystride=_src->stride;
1752   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1753   src=_src->data+y*(ptrdiff_t)src_ystride;
1754   width=_dst->width;
1755   for(;y<4;y++){
1756     memcpy(dst,src,width*sizeof(dst[0]));
1757     dst+=dst_ystride;
1758     src+=src_ystride;
1759   }
1760   /*We also want to skip the last row in the frame for this loop.*/
1761   y_end=_fragy_end-!notdone<<3;
1762   for(;y<y_end;y+=8){
1763     qstep=_dec->pp_dc_scale[*dc_qi];
1764     flimit=(qstep*3)>>2;
1765     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1766      qstep,flimit,variance,variance+nhfrags);
1767     variance++;
1768     dc_qi++;
1769     for(x=8;x<width;x+=8){
1770       qstep=_dec->pp_dc_scale[*dc_qi];
1771       flimit=(qstep*3)>>2;
1772       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1773        qstep,flimit,variance,variance+nhfrags);
1774       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1775        qstep,flimit,variance-1);
1776       variance++;
1777       dc_qi++;
1778     }
1779     dst+=dst_ystride<<3;
1780     src+=src_ystride<<3;
1781   }
1782   /*And finally, handle the last row in the frame, if it's in the range.*/
1783   if(!notdone){
1784     int height;
1785     height=_dst->height;
1786     for(;y<height;y++){
1787       memcpy(dst,src,width*sizeof(dst[0]));
1788       dst+=dst_ystride;
1789       src+=src_ystride;
1790     }
1791     /*Filter the last row of vertical block edges.*/
1792     dc_qi++;
1793     for(x=8;x<width;x+=8){
1794       qstep=_dec->pp_dc_scale[*dc_qi++];
1795       flimit=(qstep*3)>>2;
1796       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1797        qstep,flimit,variance++);
1798     }
1799   }
1800 }
1801
1802 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1803  int _dc_scale,int _sharp_mod,int _strong){
1804   static const unsigned char OC_MOD_MAX[2]={24,32};
1805   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1806   const unsigned char *psrc;
1807   const unsigned char *src;
1808   const unsigned char *nsrc;
1809   unsigned char       *dst;
1810   int                  vmod[72];
1811   int                  hmod[72];
1812   int                  mod_hi;
1813   int                  by;
1814   int                  bx;
1815   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1816   dst=_idata;
1817   src=dst;
1818   psrc=src-(_ystride&-!(_b&4));
1819   for(by=0;by<9;by++){
1820     for(bx=0;bx<8;bx++){
1821       int mod;
1822       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1823       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1824     }
1825     psrc=src;
1826     src+=_ystride&-(!(_b&8)|by<7);
1827   }
1828   nsrc=dst;
1829   psrc=dst-!(_b&1);
1830   for(bx=0;bx<9;bx++){
1831     src=nsrc;
1832     for(by=0;by<8;by++){
1833       int mod;
1834       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1835       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1836       psrc+=_ystride;
1837       src+=_ystride;
1838     }
1839     psrc=nsrc;
1840     nsrc+=!(_b&2)|bx<7;
1841   }
1842   src=dst;
1843   psrc=src-(_ystride&-!(_b&4));
1844   nsrc=src+_ystride;
1845   for(by=0;by<8;by++){
1846     int a;
1847     int b;
1848     int w;
1849     a=128;
1850     b=64;
1851     w=hmod[by];
1852     a-=w;
1853     b+=w**(src-!(_b&1));
1854     w=vmod[by<<3];
1855     a-=w;
1856     b+=w*psrc[0];
1857     w=vmod[by+1<<3];
1858     a-=w;
1859     b+=w*nsrc[0];
1860     w=hmod[(1<<3)+by];
1861     a-=w;
1862     b+=w*src[1];
1863     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1864     for(bx=1;bx<7;bx++){
1865       a=128;
1866       b=64;
1867       w=hmod[(bx<<3)+by];
1868       a-=w;
1869       b+=w*src[bx-1];
1870       w=vmod[(by<<3)+bx];
1871       a-=w;
1872       b+=w*psrc[bx];
1873       w=vmod[(by+1<<3)+bx];
1874       a-=w;
1875       b+=w*nsrc[bx];
1876       w=hmod[(bx+1<<3)+by];
1877       a-=w;
1878       b+=w*src[bx+1];
1879       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1880     }
1881     a=128;
1882     b=64;
1883     w=hmod[(7<<3)+by];
1884     a-=w;
1885     b+=w*src[6];
1886     w=vmod[(by<<3)+7];
1887     a-=w;
1888     b+=w*psrc[7];
1889     w=vmod[(by+1<<3)+7];
1890     a-=w;
1891     b+=w*nsrc[7];
1892     w=hmod[(8<<3)+by];
1893     a-=w;
1894     b+=w*src[7+!(_b&2)];
1895     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1896     dst+=_ystride;
1897     psrc=src;
1898     src=nsrc;
1899     nsrc+=_ystride&-(!(_b&8)|by<6);
1900   }
1901 }
1902
1903 #define OC_DERING_THRESH1 (384)
1904 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1905 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1906 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1907
1908 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1909  int _pli,int _fragy0,int _fragy_end){
1910   th_img_plane      *iplane;
1911   oc_fragment_plane *fplane;
1912   oc_fragment       *frag;
1913   int               *variance;
1914   unsigned char     *idata;
1915   ptrdiff_t          froffset;
1916   int                ystride;
1917   int                nhfrags;
1918   int                sthresh;
1919   int                strong;
1920   int                y_end;
1921   int                width;
1922   int                height;
1923   int                y;
1924   int                x;
1925   iplane=_img+_pli;
1926   fplane=_dec->state.fplanes+_pli;
1927   nhfrags=fplane->nhfrags;
1928   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1929   variance=_dec->variances+froffset;
1930   frag=_dec->state.frags+froffset;
1931   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1932   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1933   y=_fragy0<<3;
1934   ystride=iplane->stride;
1935   idata=iplane->data+y*(ptrdiff_t)ystride;
1936   y_end=_fragy_end<<3;
1937   width=iplane->width;
1938   height=iplane->height;
1939   for(;y<y_end;y+=8){
1940     for(x=0;x<width;x+=8){
1941       int b;
1942       int qi;
1943       int var;
1944       qi=_dec->state.qis[frag->qii];
1945       var=*variance;
1946       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1947       if(strong&&var>sthresh){
1948         oc_dering_block(idata+x,ystride,b,
1949          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1950         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1951          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1952          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1953          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1954           oc_dering_block(idata+x,ystride,b,
1955            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1956           oc_dering_block(idata+x,ystride,b,
1957            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1958         }
1959       }
1960       else if(var>OC_DERING_THRESH2){
1961         oc_dering_block(idata+x,ystride,b,
1962          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1963       }
1964       else if(var>OC_DERING_THRESH1){
1965         oc_dering_block(idata+x,ystride,b,
1966          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1967       }
1968       frag++;
1969       variance++;
1970     }
1971     idata+=ystride<<3;
1972   }
1973 }
1974
1975
1976
1977 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1978   oc_dec_ctx *dec;
1979   if(_info==NULL||_setup==NULL)return NULL;
1980   dec=oc_aligned_malloc(sizeof(*dec),16);
1981   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1982     oc_aligned_free(dec);
1983     return NULL;
1984   }
1985   dec->state.curframe_num=0;
1986   return dec;
1987 }
1988
1989 void th_decode_free(th_dec_ctx *_dec){
1990   if(_dec!=NULL){
1991     oc_dec_clear(_dec);
1992     oc_aligned_free(_dec);
1993   }
1994 }
1995
1996 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1997  size_t _buf_sz){
1998   switch(_req){
1999   case TH_DECCTL_GET_PPLEVEL_MAX:{
2000     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2001     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2002     (*(int *)_buf)=OC_PP_LEVEL_MAX;
2003     return 0;
2004   }break;
2005   case TH_DECCTL_SET_PPLEVEL:{
2006     int pp_level;
2007     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2008     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2009     pp_level=*(int *)_buf;
2010     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
2011     _dec->pp_level=pp_level;
2012     return 0;
2013   }break;
2014   case TH_DECCTL_SET_GRANPOS:{
2015     ogg_int64_t granpos;
2016     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2017     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2018     granpos=*(ogg_int64_t *)_buf;
2019     if(granpos<0)return TH_EINVAL;
2020     _dec->state.granpos=granpos;
2021     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2022      -_dec->state.granpos_bias;
2023     _dec->state.curframe_num=_dec->state.keyframe_num
2024      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2025     return 0;
2026   }break;
2027   case TH_DECCTL_SET_STRIPE_CB:{
2028     th_stripe_callback *cb;
2029     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2030     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2031     cb=(th_stripe_callback *)_buf;
2032     _dec->stripe_cb.ctx=cb->ctx;
2033     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2034     return 0;
2035   }break;
2036 #ifdef HAVE_CAIRO
2037   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2038     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2039     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2040     _dec->telemetry=1;
2041     _dec->telemetry_mbmode=*(int *)_buf;
2042     return 0;
2043   }break;
2044   case TH_DECCTL_SET_TELEMETRY_MV:{
2045     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2046     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2047     _dec->telemetry=1;
2048     _dec->telemetry_mv=*(int *)_buf;
2049     return 0;
2050   }break;
2051   case TH_DECCTL_SET_TELEMETRY_QI:{
2052     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2053     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2054     _dec->telemetry=1;
2055     _dec->telemetry_qi=*(int *)_buf;
2056     return 0;
2057   }break;
2058   case TH_DECCTL_SET_TELEMETRY_BITS:{
2059     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2060     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2061     _dec->telemetry=1;
2062     _dec->telemetry_bits=*(int *)_buf;
2063     return 0;
2064   }break;
2065 #endif
2066   default:return TH_EIMPL;
2067   }
2068 }
2069
2070 /*We're decoding an INTER frame, but have no initialized reference
2071    buffers (i.e., decoding did not start on a key frame).
2072   We initialize them to a solid gray here.*/
2073 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2074   th_info *info;
2075   size_t   yplane_sz;
2076   size_t   cplane_sz;
2077   int      yhstride;
2078   int      yheight;
2079   int      chstride;
2080   int      cheight;
2081   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2082   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2083   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2084   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2085    sizeof(_dec->pp_frame_buf[0])*3);
2086   info=&_dec->state.info;
2087   yhstride=info->frame_width+2*OC_UMV_PADDING;
2088   yheight=info->frame_height+2*OC_UMV_PADDING;
2089   chstride=yhstride>>!(info->pixel_fmt&1);
2090   cheight=yheight>>!(info->pixel_fmt&2);
2091   yplane_sz=yhstride*(size_t)yheight;
2092   cplane_sz=chstride*(size_t)cheight;
2093   memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
2094 }
2095
2096 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2097  ogg_int64_t *_granpos){
2098   int ret;
2099   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2100   /*A completely empty packet indicates a dropped frame and is treated exactly
2101      like an inter frame with no coded blocks.*/
2102   if(_op->bytes==0){
2103     _dec->state.frame_type=OC_INTER_FRAME;
2104     _dec->state.ntotal_coded_fragis=0;
2105   }
2106   else{
2107     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2108     ret=oc_dec_frame_header_unpack(_dec);
2109     if(ret<0)return ret;
2110     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2111     else oc_dec_coded_flags_unpack(_dec);
2112   }
2113   /*If there have been no reference frames, and we need one, initialize one.*/
2114   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2115    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2116    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2117     oc_dec_init_dummy_frame(_dec);
2118   }
2119   /*If this was an inter frame with no coded blocks...*/
2120   if(_dec->state.ntotal_coded_fragis<=0){
2121     /*Just update the granule position and return.*/
2122     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2123      _dec->state.info.keyframe_granule_shift)
2124      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2125     _dec->state.curframe_num++;
2126     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2127     return TH_DUPFRAME;
2128   }
2129   else{
2130     th_ycbcr_buffer stripe_buf;
2131     int             stripe_fragy;
2132     int             refi;
2133     int             pli;
2134     int             notstart;
2135     int             notdone;
2136     /*Select a free buffer to use for the reconstructed version of this frame.*/
2137     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2138      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2139     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2140 #if defined(HAVE_CAIRO)
2141     _dec->telemetry_frame_bytes=_op->bytes;
2142 #endif
2143     if(_dec->state.frame_type==OC_INTRA_FRAME){
2144       _dec->state.keyframe_num=_dec->state.curframe_num;
2145 #if defined(HAVE_CAIRO)
2146       _dec->telemetry_coding_bytes=
2147        _dec->telemetry_mode_bytes=
2148        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2149 #endif
2150     }
2151     else{
2152 #if defined(HAVE_CAIRO)
2153       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2154 #endif
2155       oc_dec_mb_modes_unpack(_dec);
2156 #if defined(HAVE_CAIRO)
2157       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2158 #endif
2159       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2160 #if defined(HAVE_CAIRO)
2161       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2162 #endif
2163     }
2164     oc_dec_block_qis_unpack(_dec);
2165 #if defined(HAVE_CAIRO)
2166     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2167 #endif
2168     oc_dec_residual_tokens_unpack(_dec);
2169     /*Update granule position.
2170       This must be done before the striped decode callbacks so that the
2171        application knows what to do with the frame data.*/
2172     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2173      _dec->state.info.keyframe_granule_shift)
2174      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2175     _dec->state.curframe_num++;
2176     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2177     /*All of the rest of the operations -- DC prediction reversal,
2178        reconstructing coded fragments, copying uncoded fragments, loop
2179        filtering, extending borders, and out-of-loop post-processing -- should
2180        be pipelined.
2181       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2182        copying are done for one or two super block rows, then loop filtering is
2183        run as far as it can, then bordering copying, then post-processing.
2184       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2185        block rows, and one chroma.
2186       Otherwise, an MCU consists of one super block row from each plane.
2187       Inside each MCU, we perform all of the steps on one color plane before
2188        moving on to the next.
2189       After reconstruction, the additional filtering stages introduce a delay
2190        since they need some pixels from the next fragment row.
2191       Thus the actual number of decoded rows available is slightly smaller for
2192        the first MCU, and slightly larger for the last.
2193
2194       This entire process allows us to operate on the data while it is still in
2195        cache, resulting in big performance improvements.
2196       An application callback allows further application processing (blitting
2197        to video memory, color conversion, etc.) to also use the data while it's
2198        in cache.*/
2199     oc_dec_pipeline_init(_dec,&_dec->pipe);
2200     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2201     notstart=0;
2202     notdone=1;
2203     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2204       int avail_fragy0;
2205       int avail_fragy_end;
2206       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2207       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2208       for(pli=0;pli<3;pli++){
2209         oc_fragment_plane *fplane;
2210         int                frag_shift;
2211         int                pp_offset;
2212         int                sdelay;
2213         int                edelay;
2214         fplane=_dec->state.fplanes+pli;
2215         /*Compute the first and last fragment row of the current MCU for this
2216            plane.*/
2217         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2218         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2219         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2220          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2221         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2222         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2223         sdelay=edelay=0;
2224         if(_dec->pipe.loop_filter){
2225           sdelay+=notstart;
2226           edelay+=notdone;
2227           oc_state_loop_filter_frag_rows(&_dec->state,
2228            _dec->pipe.bounding_values,refi,pli,
2229            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2230         }
2231         /*To fill the borders, we have an additional two pixel delay, since a
2232            fragment in the next row could filter its top edge, using two pixels
2233            from a fragment in this row.
2234           But there's no reason to delay a full fragment between the two.*/
2235         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2236          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2237          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2238         /*Out-of-loop post-processing.*/
2239         pp_offset=3*(pli!=0);
2240         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2241           /*Perform de-blocking in one plane.*/
2242           sdelay+=notstart;
2243           edelay+=notdone;
2244           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2245            _dec->state.ref_frame_bufs[refi],pli,
2246            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2247           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2248             /*Perform de-ringing in one plane.*/
2249             sdelay+=notstart;
2250             edelay+=notdone;
2251             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2252              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2253           }
2254         }
2255         /*If no post-processing is done, we still need to delay a row for the
2256            loop filter, thanks to the strange filtering order VP3 chose.*/
2257         else if(_dec->pipe.loop_filter){
2258           sdelay+=notstart;
2259           edelay+=notdone;
2260         }
2261         /*Compute the intersection of the available rows in all planes.
2262           If chroma is sub-sampled, the effect of each of its delays is
2263            doubled, but luma might have more post-processing filters enabled
2264            than chroma, so we don't know up front which one is the limiting
2265            factor.*/
2266         avail_fragy0=OC_MINI(avail_fragy0,
2267          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2268         avail_fragy_end=OC_MINI(avail_fragy_end,
2269          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2270       }
2271       if(_dec->stripe_cb.stripe_decoded!=NULL){
2272         /*The callback might want to use the FPU, so let's make sure they can.
2273           We violate all kinds of ABI restrictions by not doing this until
2274            now, but none of them actually matter since we don't use floating
2275            point ourselves.*/
2276         oc_restore_fpu(&_dec->state);
2277         /*Make the callback, ensuring we flip the sense of the "start" and
2278            "end" of the available region upside down.*/
2279         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2280          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2281          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2282       }
2283       notstart=1;
2284     }
2285     /*Finish filling in the reference frame borders.*/
2286     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2287     /*Update the reference frame indices.*/
2288     if(_dec->state.frame_type==OC_INTRA_FRAME){
2289       /*The new frame becomes both the previous and gold reference frames.*/
2290       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2291        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2292        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2293     }
2294     else{
2295       /*Otherwise, just replace the previous reference frame.*/
2296       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2297        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2298     }
2299     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2300        gamma values, if nothing else).*/
2301     oc_restore_fpu(&_dec->state);
2302 #if defined(OC_DUMP_IMAGES)
2303     /*We only dump images if there were some coded blocks.*/
2304     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2305 #endif
2306     return 0;
2307   }
2308 }
2309
2310 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2311   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2312   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2313 #if defined(HAVE_CAIRO)
2314   /*If telemetry ioctls are active, we need to draw to the output buffer.
2315     Stuff the plane into cairo.*/
2316   if(_dec->telemetry){
2317     cairo_surface_t *cs;
2318     unsigned char   *data;
2319     unsigned char   *y_row;
2320     unsigned char   *u_row;
2321     unsigned char   *v_row;
2322     unsigned char   *rgb_row;
2323     int              cstride;
2324     int              w;
2325     int              h;
2326     int              x;
2327     int              y;
2328     int              hdec;
2329     int              vdec;
2330     w=_ycbcr[0].width;
2331     h=_ycbcr[0].height;
2332     hdec=!(_dec->state.info.pixel_fmt&1);
2333     vdec=!(_dec->state.info.pixel_fmt&2);
2334     /*Lazy data buffer init.
2335       We could try to re-use the post-processing buffer, which would save
2336        memory, but complicate the allocation logic there.
2337       I don't think anyone cares about memory usage when using telemetry; it is
2338        not meant for embedded devices.*/
2339     if(_dec->telemetry_frame_data==NULL){
2340       _dec->telemetry_frame_data=_ogg_malloc(
2341        (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2342       if(_dec->telemetry_frame_data==NULL)return 0;
2343     }
2344     cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2345     /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2346     data=cairo_image_surface_get_data(cs);
2347     if(data==NULL){
2348       cairo_surface_destroy(cs);
2349       return 0;
2350     }
2351     cstride=cairo_image_surface_get_stride(cs);
2352     y_row=_ycbcr[0].data;
2353     u_row=_ycbcr[1].data;
2354     v_row=_ycbcr[2].data;
2355     rgb_row=data;
2356     for(y=0;y<h;y++){
2357       for(x=0;x<w;x++){
2358         int r;
2359         int g;
2360         int b;
2361         r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2362         g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2363          -2672387*v_row[x>>hdec]+447306710)/3287200;
2364         b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2365         rgb_row[4*x+0]=OC_CLAMP255(b);
2366         rgb_row[4*x+1]=OC_CLAMP255(g);
2367         rgb_row[4*x+2]=OC_CLAMP255(r);
2368       }
2369       y_row+=_ycbcr[0].stride;
2370       u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2371       v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2372       rgb_row+=cstride;
2373     }
2374     /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2375     {
2376       cairo_t           *c;
2377       const oc_fragment *frags;
2378       oc_mv             *frag_mvs;
2379       const signed char *mb_modes;
2380       oc_mb_map         *mb_maps;
2381       size_t             nmbs;
2382       size_t             mbi;
2383       int                row2;
2384       int                col2;
2385       int                qim[3]={0,0,0};
2386       if(_dec->state.nqis==2){
2387         int bqi;
2388         bqi=_dec->state.qis[0];
2389         if(_dec->state.qis[1]>bqi)qim[1]=1;
2390         if(_dec->state.qis[1]<bqi)qim[1]=-1;
2391       }
2392       if(_dec->state.nqis==3){
2393         int bqi;
2394         int cqi;
2395         int dqi;
2396         bqi=_dec->state.qis[0];
2397         cqi=_dec->state.qis[1];
2398         dqi=_dec->state.qis[2];
2399         if(cqi>bqi&&dqi>bqi){
2400           if(dqi>cqi){
2401             qim[1]=1;
2402             qim[2]=2;
2403           }
2404           else{
2405             qim[1]=2;
2406             qim[2]=1;
2407           }
2408         }
2409         else if(cqi<bqi&&dqi<bqi){
2410           if(dqi<cqi){
2411             qim[1]=-1;
2412             qim[2]=-2;
2413           }
2414           else{
2415             qim[1]=-2;
2416             qim[2]=-1;
2417           }
2418         }
2419         else{
2420           if(cqi<bqi)qim[1]=-1;
2421           else qim[1]=1;
2422           if(dqi<bqi)qim[2]=-1;
2423           else qim[2]=1;
2424         }
2425       }
2426       c=cairo_create(cs);
2427       frags=_dec->state.frags;
2428       frag_mvs=_dec->state.frag_mvs;
2429       mb_modes=_dec->state.mb_modes;
2430       mb_maps=_dec->state.mb_maps;
2431       nmbs=_dec->state.nmbs;
2432       row2=0;
2433       col2=0;
2434       for(mbi=0;mbi<nmbs;mbi++){
2435         float x;
2436         float y;
2437         int   bi;
2438         y=h-(row2+((col2+1>>1)&1))*16-16;
2439         x=(col2>>1)*16;
2440         cairo_set_line_width(c,1.);
2441         /*Keyframe (all intra) red box.*/
2442         if(_dec->state.frame_type==OC_INTRA_FRAME){
2443           if(_dec->telemetry_mbmode&0x02){
2444             cairo_set_source_rgba(c,1.,0,0,.5);
2445             cairo_rectangle(c,x+2.5,y+2.5,11,11);
2446             cairo_stroke_preserve(c);
2447             cairo_set_source_rgba(c,1.,0,0,.25);
2448             cairo_fill(c);
2449           }
2450         }
2451         else{
2452           ptrdiff_t fragi;
2453           int       frag_mvx;
2454           int       frag_mvy;
2455           for(bi=0;bi<4;bi++){
2456             fragi=mb_maps[mbi][0][bi];
2457             if(fragi>=0&&frags[fragi].coded){
2458               frag_mvx=OC_MV_X(frag_mvs[fragi]);
2459               frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2460               break;
2461             }
2462           }
2463           if(bi<4){
2464             switch(mb_modes[mbi]){
2465               case OC_MODE_INTRA:{
2466                 if(_dec->telemetry_mbmode&0x02){
2467                   cairo_set_source_rgba(c,1.,0,0,.5);
2468                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2469                   cairo_stroke_preserve(c);
2470                   cairo_set_source_rgba(c,1.,0,0,.25);
2471                   cairo_fill(c);
2472                 }
2473               }break;
2474               case OC_MODE_INTER_NOMV:{
2475                 if(_dec->telemetry_mbmode&0x01){
2476                   cairo_set_source_rgba(c,0,0,1.,.5);
2477                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2478                   cairo_stroke_preserve(c);
2479                   cairo_set_source_rgba(c,0,0,1.,.25);
2480                   cairo_fill(c);
2481                 }
2482               }break;
2483               case OC_MODE_INTER_MV:{
2484                 if(_dec->telemetry_mbmode&0x04){
2485                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2486                   cairo_set_source_rgba(c,0,1.,0,.5);
2487                   cairo_stroke(c);
2488                 }
2489                 if(_dec->telemetry_mv&0x04){
2490                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2491                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2492                   cairo_set_line_width(c,3.);
2493                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2494                   cairo_stroke_preserve(c);
2495                   cairo_set_line_width(c,2.);
2496                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2497                   cairo_stroke_preserve(c);
2498                   cairo_set_line_width(c,1.);
2499                   cairo_line_to(c,x+8,y+8);
2500                   cairo_stroke(c);
2501                 }
2502               }break;
2503               case OC_MODE_INTER_MV_LAST:{
2504                 if(_dec->telemetry_mbmode&0x08){
2505                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2506                   cairo_set_source_rgba(c,0,1.,0,.5);
2507                   cairo_move_to(c,x+13.5,y+2.5);
2508                   cairo_line_to(c,x+2.5,y+8);
2509                   cairo_line_to(c,x+13.5,y+13.5);
2510                   cairo_stroke(c);
2511                 }
2512                 if(_dec->telemetry_mv&0x08){
2513                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2514                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2515                   cairo_set_line_width(c,3.);
2516                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2517                   cairo_stroke_preserve(c);
2518                   cairo_set_line_width(c,2.);
2519                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2520                   cairo_stroke_preserve(c);
2521                   cairo_set_line_width(c,1.);
2522                   cairo_line_to(c,x+8,y+8);
2523                   cairo_stroke(c);
2524                 }
2525               }break;
2526               case OC_MODE_INTER_MV_LAST2:{
2527                 if(_dec->telemetry_mbmode&0x10){
2528                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2529                   cairo_set_source_rgba(c,0,1.,0,.5);
2530                   cairo_move_to(c,x+8,y+2.5);
2531                   cairo_line_to(c,x+2.5,y+8);
2532                   cairo_line_to(c,x+8,y+13.5);
2533                   cairo_move_to(c,x+13.5,y+2.5);
2534                   cairo_line_to(c,x+8,y+8);
2535                   cairo_line_to(c,x+13.5,y+13.5);
2536                   cairo_stroke(c);
2537                 }
2538                 if(_dec->telemetry_mv&0x10){
2539                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2540                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2541                   cairo_set_line_width(c,3.);
2542                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2543                   cairo_stroke_preserve(c);
2544                   cairo_set_line_width(c,2.);
2545                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2546                   cairo_stroke_preserve(c);
2547                   cairo_set_line_width(c,1.);
2548                   cairo_line_to(c,x+8,y+8);
2549                   cairo_stroke(c);
2550                 }
2551               }break;
2552               case OC_MODE_GOLDEN_NOMV:{
2553                 if(_dec->telemetry_mbmode&0x20){
2554                   cairo_set_source_rgba(c,1.,1.,0,.5);
2555                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2556                   cairo_stroke_preserve(c);
2557                   cairo_set_source_rgba(c,1.,1.,0,.25);
2558                   cairo_fill(c);
2559                 }
2560               }break;
2561               case OC_MODE_GOLDEN_MV:{
2562                 if(_dec->telemetry_mbmode&0x40){
2563                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2564                   cairo_set_source_rgba(c,1.,1.,0,.5);
2565                   cairo_stroke(c);
2566                 }
2567                 if(_dec->telemetry_mv&0x40){
2568                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2569                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2570                   cairo_set_line_width(c,3.);
2571                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2572                   cairo_stroke_preserve(c);
2573                   cairo_set_line_width(c,2.);
2574                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2575                   cairo_stroke_preserve(c);
2576                   cairo_set_line_width(c,1.);
2577                   cairo_line_to(c,x+8,y+8);
2578                   cairo_stroke(c);
2579                 }
2580               }break;
2581               case OC_MODE_INTER_MV_FOUR:{
2582                 if(_dec->telemetry_mbmode&0x80){
2583                   cairo_rectangle(c,x+2.5,y+2.5,4,4);
2584                   cairo_rectangle(c,x+9.5,y+2.5,4,4);
2585                   cairo_rectangle(c,x+2.5,y+9.5,4,4);
2586                   cairo_rectangle(c,x+9.5,y+9.5,4,4);
2587                   cairo_set_source_rgba(c,0,1.,0,.5);
2588                   cairo_stroke(c);
2589                 }
2590                 /*4mv is odd, coded in raster order.*/
2591                 fragi=mb_maps[mbi][0][0];
2592                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2593                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2594                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2595                   cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2596                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2597                   cairo_set_line_width(c,3.);
2598                   cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2599                   cairo_stroke_preserve(c);
2600                   cairo_set_line_width(c,2.);
2601                   cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2602                   cairo_stroke_preserve(c);
2603                   cairo_set_line_width(c,1.);
2604                   cairo_line_to(c,x+4,y+12);
2605                   cairo_stroke(c);
2606                 }
2607                 fragi=mb_maps[mbi][0][1];
2608                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2609                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2610                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2611                   cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2612                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2613                   cairo_set_line_width(c,3.);
2614                   cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2615                   cairo_stroke_preserve(c);
2616                   cairo_set_line_width(c,2.);
2617                   cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2618                   cairo_stroke_preserve(c);
2619                   cairo_set_line_width(c,1.);
2620                   cairo_line_to(c,x+12,y+12);
2621                   cairo_stroke(c);
2622                 }
2623                 fragi=mb_maps[mbi][0][2];
2624                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2625                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2626                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2627                   cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2628                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2629                   cairo_set_line_width(c,3.);
2630                   cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2631                   cairo_stroke_preserve(c);
2632                   cairo_set_line_width(c,2.);
2633                   cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2634                   cairo_stroke_preserve(c);
2635                   cairo_set_line_width(c,1.);
2636                   cairo_line_to(c,x+4,y+4);
2637                   cairo_stroke(c);
2638                 }
2639                 fragi=mb_maps[mbi][0][3];
2640                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2641                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2642                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2643                   cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2644                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2645                   cairo_set_line_width(c,3.);
2646                   cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2647                   cairo_stroke_preserve(c);
2648                   cairo_set_line_width(c,2.);
2649                   cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2650                   cairo_stroke_preserve(c);
2651                   cairo_set_line_width(c,1.);
2652                   cairo_line_to(c,x+12,y+4);
2653                   cairo_stroke(c);
2654                 }
2655               }break;
2656             }
2657           }
2658         }
2659         /*qii illustration.*/
2660         if(_dec->telemetry_qi&0x2){
2661           cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2662           for(bi=0;bi<4;bi++){
2663             ptrdiff_t fragi;
2664             int       qiv;
2665             int       xp;
2666             int       yp;
2667             xp=x+(bi&1)*8;
2668             yp=y+8-(bi&2)*4;
2669             fragi=mb_maps[mbi][0][bi];
2670             if(fragi>=0&&frags[fragi].coded){
2671               qiv=qim[frags[fragi].qii];
2672               cairo_set_line_width(c,3.);
2673               cairo_set_source_rgba(c,0.,0.,0.,.5);
2674               switch(qiv){
2675                 /*Double plus:*/
2676                 case 2:{
2677                   if((bi&1)^((bi&2)>>1)){
2678                     cairo_move_to(c,xp+2.5,yp+1.5);
2679                     cairo_line_to(c,xp+2.5,yp+3.5);
2680                     cairo_move_to(c,xp+1.5,yp+2.5);
2681                     cairo_line_to(c,xp+3.5,yp+2.5);
2682                     cairo_move_to(c,xp+5.5,yp+4.5);
2683                     cairo_line_to(c,xp+5.5,yp+6.5);
2684                     cairo_move_to(c,xp+4.5,yp+5.5);
2685                     cairo_line_to(c,xp+6.5,yp+5.5);
2686                     cairo_stroke_preserve(c);
2687                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2688                   }
2689                   else{
2690                     cairo_move_to(c,xp+5.5,yp+1.5);
2691                     cairo_line_to(c,xp+5.5,yp+3.5);
2692                     cairo_move_to(c,xp+4.5,yp+2.5);
2693                     cairo_line_to(c,xp+6.5,yp+2.5);
2694                     cairo_move_to(c,xp+2.5,yp+4.5);
2695                     cairo_line_to(c,xp+2.5,yp+6.5);
2696                     cairo_move_to(c,xp+1.5,yp+5.5);
2697                     cairo_line_to(c,xp+3.5,yp+5.5);
2698                     cairo_stroke_preserve(c);
2699                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2700                   }
2701                 }break;
2702                 /*Double minus:*/
2703                 case -2:{
2704                   cairo_move_to(c,xp+2.5,yp+2.5);
2705                   cairo_line_to(c,xp+5.5,yp+2.5);
2706                   cairo_move_to(c,xp+2.5,yp+5.5);
2707                   cairo_line_to(c,xp+5.5,yp+5.5);
2708                   cairo_stroke_preserve(c);
2709                   cairo_set_source_rgba(c,1.,1.,1.,1.);
2710                 }break;
2711                 /*Plus:*/
2712                 case 1:{
2713                   if(bi&2==0)yp-=2;
2714                   if(bi&1==0)xp-=2;
2715                   cairo_move_to(c,xp+4.5,yp+2.5);
2716                   cairo_line_to(c,xp+4.5,yp+6.5);
2717                   cairo_move_to(c,xp+2.5,yp+4.5);
2718                   cairo_line_to(c,xp+6.5,yp+4.5);
2719                   cairo_stroke_preserve(c);
2720                   cairo_set_source_rgba(c,.1,1.,.3,1.);
2721                   break;
2722                 }
2723                 /*Fall through.*/
2724                 /*Minus:*/
2725                 case -1:{
2726                   cairo_move_to(c,xp+2.5,yp+4.5);
2727                   cairo_line_to(c,xp+6.5,yp+4.5);
2728                   cairo_stroke_preserve(c);
2729                   cairo_set_source_rgba(c,1.,.3,.1,1.);
2730                 }break;
2731                 default:continue;
2732               }
2733               cairo_set_line_width(c,1.);
2734               cairo_stroke(c);
2735             }
2736           }
2737         }
2738         col2++;
2739         if((col2>>1)>=_dec->state.nhmbs){
2740           col2=0;
2741           row2+=2;
2742         }
2743       }
2744       /*Bit usage indicator[s]:*/
2745       if(_dec->telemetry_bits){
2746         int widths[6];
2747         int fpsn;
2748         int fpsd;
2749         int mult;
2750         int fullw;
2751         int padw;
2752         int i;
2753         fpsn=_dec->state.info.fps_numerator;
2754         fpsd=_dec->state.info.fps_denominator;
2755         mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2756         fullw=250.f*h*fpsd*mult/fpsn;
2757         padw=w-24;
2758         /*Header and coded block bits.*/
2759         if(_dec->telemetry_frame_bytes<0||
2760          _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2761           _dec->telemetry_frame_bytes=0;
2762         }
2763         if(_dec->telemetry_coding_bytes<0||
2764          _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2765           _dec->telemetry_coding_bytes=0;
2766         }
2767         if(_dec->telemetry_mode_bytes<0||
2768          _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2769           _dec->telemetry_mode_bytes=0;
2770         }
2771         if(_dec->telemetry_mv_bytes<0||
2772          _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2773           _dec->telemetry_mv_bytes=0;
2774         }
2775         if(_dec->telemetry_qi_bytes<0||
2776          _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2777           _dec->telemetry_qi_bytes=0;
2778         }
2779         if(_dec->telemetry_dc_bytes<0||
2780          _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2781           _dec->telemetry_dc_bytes=0;
2782         }
2783         widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2784         widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2785         widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2786         widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2787         widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2788         widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2789         for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2790         cairo_set_source_rgba(c,.0,.0,.0,.6);
2791         cairo_rectangle(c,10,h-33,widths[0]+1,5);
2792         cairo_rectangle(c,10,h-29,widths[1]+1,5);
2793         cairo_rectangle(c,10,h-25,widths[2]+1,5);
2794         cairo_rectangle(c,10,h-21,widths[3]+1,5);
2795         cairo_rectangle(c,10,h-17,widths[4]+1,5);
2796         cairo_rectangle(c,10,h-13,widths[5]+1,5);
2797         cairo_fill(c);
2798         cairo_set_source_rgb(c,1,0,0);
2799         cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2800         cairo_fill(c);
2801         cairo_set_source_rgb(c,0,1,0);
2802         cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2803         cairo_fill(c);
2804         cairo_set_source_rgb(c,0,0,1);
2805         cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2806         cairo_fill(c);
2807         cairo_set_source_rgb(c,.6,.4,.0);
2808         cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2809         cairo_fill(c);
2810         cairo_set_source_rgb(c,.3,.3,.3);
2811         cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2812         cairo_fill(c);
2813         cairo_set_source_rgb(c,.5,.5,.8);
2814         cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2815         cairo_fill(c);
2816       }
2817       /*Master qi indicator[s]:*/
2818       if(_dec->telemetry_qi&0x1){
2819         cairo_text_extents_t extents;
2820         char                 buffer[10];
2821         int                  p;
2822         int                  y;
2823         p=0;
2824         y=h-7.5;
2825         if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2826         buffer[p++]=48+_dec->state.qis[0]%10;
2827         if(_dec->state.nqis>=2){
2828           buffer[p++]=' ';
2829           if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2830           buffer[p++]=48+_dec->state.qis[1]%10;
2831         }
2832         if(_dec->state.nqis==3){
2833           buffer[p++]=' ';
2834           if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2835           buffer[p++]=48+_dec->state.qis[2]%10;
2836         }
2837         buffer[p++]='\0';
2838         cairo_select_font_face(c,"sans",
2839          CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2840         cairo_set_font_size(c,18);
2841         cairo_text_extents(c,buffer,&extents);
2842         cairo_set_source_rgb(c,1,1,1);
2843         cairo_move_to(c,w-extents.x_advance-10,y);
2844         cairo_show_text(c,buffer);
2845         cairo_set_source_rgb(c,0,0,0);
2846         cairo_move_to(c,w-extents.x_advance-10,y);
2847         cairo_text_path(c,buffer);
2848         cairo_set_line_width(c,.8);
2849         cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2850         cairo_stroke(c);
2851       }
2852       cairo_destroy(c);
2853     }
2854     /*Out of the Cairo plane into the telemetry YUV buffer.*/
2855     _ycbcr[0].data=_dec->telemetry_frame_data;
2856     _ycbcr[0].stride=_ycbcr[0].width;
2857     _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2858     _ycbcr[1].stride=_ycbcr[1].width;
2859     _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2860     _ycbcr[2].stride=_ycbcr[2].width;
2861     y_row=_ycbcr[0].data;
2862     u_row=_ycbcr[1].data;
2863     v_row=_ycbcr[2].data;
2864     rgb_row=data;
2865     /*This is one of the few places it's worth handling chroma on a
2866        case-by-case basis.*/
2867     switch(_dec->state.info.pixel_fmt){
2868       case TH_PF_420:{
2869         for(y=0;y<h;y+=2){
2870           unsigned char *y_row2;
2871           unsigned char *rgb_row2;
2872           y_row2=y_row+_ycbcr[0].stride;
2873           rgb_row2=rgb_row+cstride;
2874           for(x=0;x<w;x+=2){
2875             int y;
2876             int u;
2877             int v;
2878             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2879              +24966*rgb_row[4*x+0]+4207500)/255000;
2880             y_row[x]=OC_CLAMP255(y);
2881             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2882              +24966*rgb_row[4*x+4]+4207500)/255000;
2883             y_row[x+1]=OC_CLAMP255(y);
2884             y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2885              +24966*rgb_row2[4*x+0]+4207500)/255000;
2886             y_row2[x]=OC_CLAMP255(y);
2887             y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2888              +24966*rgb_row2[4*x+4]+4207500)/255000;
2889             y_row2[x+1]=OC_CLAMP255(y);
2890             u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2891              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2892              -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2893              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2894              +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2895              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2896             v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2897              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2898              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2899               +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2900              -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2901               +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2902             u_row[x>>1]=OC_CLAMP255(u);
2903             v_row[x>>1]=OC_CLAMP255(v);
2904           }
2905           y_row+=_ycbcr[0].stride<<1;
2906           u_row+=_ycbcr[1].stride;
2907           v_row+=_ycbcr[2].stride;
2908           rgb_row+=cstride<<1;
2909         }
2910       }break;
2911       case TH_PF_422:{
2912         for(y=0;y<h;y++){
2913           for(x=0;x<w;x+=2){
2914             int y;
2915             int u;
2916             int v;
2917             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2918              +24966*rgb_row[4*x+0]+4207500)/255000;
2919             y_row[x]=OC_CLAMP255(y);
2920             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2921              +24966*rgb_row[4*x+4]+4207500)/255000;
2922             y_row[x+1]=OC_CLAMP255(y);
2923             u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2924              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2925              +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2926             v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2927              -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2928              -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2929             u_row[x>>1]=OC_CLAMP255(u);
2930             v_row[x>>1]=OC_CLAMP255(v);
2931           }
2932           y_row+=_ycbcr[0].stride;
2933           u_row+=_ycbcr[1].stride;
2934           v_row+=_ycbcr[2].stride;
2935           rgb_row+=cstride;
2936         }
2937       }break;
2938       /*case TH_PF_444:*/
2939       default:{
2940         for(y=0;y<h;y++){
2941           for(x=0;x<w;x++){
2942             int y;
2943             int u;
2944             int v;
2945             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2946              +24966*rgb_row[4*x+0]+4207500)/255000;
2947             u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2948              +99232*rgb_row[4*x+0]+29032005)/225930;
2949             v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2950              -25536*rgb_row[4*x+0]+45940035)/357510;
2951             y_row[x]=OC_CLAMP255(y);
2952             u_row[x]=OC_CLAMP255(u);
2953             v_row[x]=OC_CLAMP255(v);
2954           }
2955           y_row+=_ycbcr[0].stride;
2956           u_row+=_ycbcr[1].stride;
2957           v_row+=_ycbcr[2].stride;
2958           rgb_row+=cstride;
2959         }
2960       }break;
2961     }
2962     /*Finished.
2963       Destroy the surface.*/
2964     cairo_surface_destroy(cs);
2965   }
2966 #endif
2967   return 0;
2968 }