Add PROC/ENDP markings to the ARM asm (currently ignored by the GNU toolchain).
[theora.git] / lib / decode.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12
13   function:
14     last mod: $Id$
15
16  ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296
297
298
299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327
328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358
359
360
361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367
368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
404        _dec->state.dequant_tables[qti][pli][qi][17]+
405        _dec->state.dequant_tables[qti][pli][qi][18]+
406        _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry=0;
421   _dec->telemetry_bits=0;
422   _dec->telemetry_qi=0;
423   _dec->telemetry_mbmode=0;
424   _dec->telemetry_mv=0;
425   _dec->telemetry_frame_data=NULL;
426 #endif
427   return 0;
428 }
429
430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432   _ogg_free(_dec->telemetry_frame_data);
433 #endif
434   _ogg_free(_dec->pp_frame_data);
435   _ogg_free(_dec->variances);
436   _ogg_free(_dec->dc_qis);
437   _ogg_free(_dec->dct_tokens);
438   oc_huff_trees_clear(_dec->huff_tables);
439   oc_state_clear(&_dec->state);
440 }
441
442
443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444   long val;
445   /*Check to make sure this is a data packet.*/
446   val=oc_pack_read1(&_dec->opb);
447   if(val!=0)return TH_EBADPACKET;
448   /*Read in the frame type (I or P).*/
449   val=oc_pack_read1(&_dec->opb);
450   _dec->state.frame_type=(int)val;
451   /*Read in the qi list.*/
452   val=oc_pack_read(&_dec->opb,6);
453   _dec->state.qis[0]=(unsigned char)val;
454   val=oc_pack_read1(&_dec->opb);
455   if(!val)_dec->state.nqis=1;
456   else{
457     val=oc_pack_read(&_dec->opb,6);
458     _dec->state.qis[1]=(unsigned char)val;
459     val=oc_pack_read1(&_dec->opb);
460     if(!val)_dec->state.nqis=2;
461     else{
462       val=oc_pack_read(&_dec->opb,6);
463       _dec->state.qis[2]=(unsigned char)val;
464       _dec->state.nqis=3;
465     }
466   }
467   if(_dec->state.frame_type==OC_INTRA_FRAME){
468     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469       Most of the other unused bits in the VP3 headers were eliminated.
470       I don't know why these remain.*/
471     /*I wanted to eliminate wasted bits, but not all config wiggle room
472        --Monty.*/
473     val=oc_pack_read(&_dec->opb,3);
474     if(val!=0)return TH_EIMPL;
475   }
476   return 0;
477 }
478
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480   This also builds up the coded fragment list (in coded order), and clears the
481    uncoded fragment list.
482   It does not update the coded macro block list nor the super block flags, as
483    those are not used when decoding INTRA frames.*/
484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485   const oc_sb_map   *sb_maps;
486   const oc_sb_flags *sb_flags;
487   oc_fragment       *frags;
488   ptrdiff_t         *coded_fragis;
489   ptrdiff_t          ncoded_fragis;
490   ptrdiff_t          prev_ncoded_fragis;
491   unsigned           nsbs;
492   unsigned           sbi;
493   int                pli;
494   coded_fragis=_dec->state.coded_fragis;
495   prev_ncoded_fragis=ncoded_fragis=0;
496   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497   sb_flags=_dec->state.sb_flags;
498   frags=_dec->state.frags;
499   sbi=nsbs=0;
500   for(pli=0;pli<3;pli++){
501     nsbs+=_dec->state.fplanes[pli].nsbs;
502     for(;sbi<nsbs;sbi++){
503       int quadi;
504       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505         int bi;
506         for(bi=0;bi<4;bi++){
507           ptrdiff_t fragi;
508           fragi=sb_maps[sbi][quadi][bi];
509           if(fragi>=0){
510             frags[fragi].coded=1;
511             frags[fragi].refi=OC_FRAME_SELF;
512             frags[fragi].mb_mode=OC_MODE_INTRA;
513             coded_fragis[ncoded_fragis++]=fragi;
514           }
515         }
516       }
517     }
518     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519     prev_ncoded_fragis=ncoded_fragis;
520   }
521   _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523
524 /*Decodes the bit flags indicating whether each super block is partially coded
525    or not.
526   Return: The number of partially coded super blocks.*/
527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528   oc_sb_flags *sb_flags;
529   unsigned     nsbs;
530   unsigned     sbi;
531   unsigned     npartial;
532   unsigned     run_count;
533   long         val;
534   int          flag;
535   val=oc_pack_read1(&_dec->opb);
536   flag=(int)val;
537   sb_flags=_dec->state.sb_flags;
538   nsbs=_dec->state.nsbs;
539   sbi=npartial=0;
540   while(sbi<nsbs){
541     int full_run;
542     run_count=oc_sb_run_unpack(&_dec->opb);
543     full_run=run_count>=4129;
544     do{
545       sb_flags[sbi].coded_partially=flag;
546       sb_flags[sbi].coded_fully=0;
547       npartial+=flag;
548       sbi++;
549     }
550     while(--run_count>0&&sbi<nsbs);
551     if(full_run&&sbi<nsbs){
552       val=oc_pack_read1(&_dec->opb);
553       flag=(int)val;
554     }
555     else flag=!flag;
556   }
557   /*TODO: run_count should be 0 here.
558     If it's not, we should issue a warning of some kind.*/
559   return npartial;
560 }
561
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563    block is fully coded or not.
564   This function should only be called if there is at least one
565    non-partially-coded super block.
566   Return: The number of partially coded super blocks.*/
567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568   oc_sb_flags *sb_flags;
569   unsigned     nsbs;
570   unsigned     sbi;
571   unsigned     run_count;
572   long         val;
573   int          flag;
574   sb_flags=_dec->state.sb_flags;
575   nsbs=_dec->state.nsbs;
576   /*Skip partially coded super blocks.*/
577   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578   val=oc_pack_read1(&_dec->opb);
579   flag=(int)val;
580   do{
581     int full_run;
582     run_count=oc_sb_run_unpack(&_dec->opb);
583     full_run=run_count>=4129;
584     for(;sbi<nsbs;sbi++){
585       if(sb_flags[sbi].coded_partially)continue;
586       if(run_count--<=0)break;
587       sb_flags[sbi].coded_fully=flag;
588     }
589     if(full_run&&sbi<nsbs){
590       val=oc_pack_read1(&_dec->opb);
591       flag=(int)val;
592     }
593     else flag=!flag;
594   }
595   while(sbi<nsbs);
596   /*TODO: run_count should be 0 here.
597     If it's not, we should issue a warning of some kind.*/
598 }
599
600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601   const oc_sb_map   *sb_maps;
602   const oc_sb_flags *sb_flags;
603   signed char       *mb_modes;
604   oc_fragment       *frags;
605   unsigned           nsbs;
606   unsigned           sbi;
607   unsigned           npartial;
608   long               val;
609   int                pli;
610   int                flag;
611   int                run_count;
612   ptrdiff_t         *coded_fragis;
613   ptrdiff_t         *uncoded_fragis;
614   ptrdiff_t          ncoded_fragis;
615   ptrdiff_t          nuncoded_fragis;
616   ptrdiff_t          prev_ncoded_fragis;
617   npartial=oc_dec_partial_sb_flags_unpack(_dec);
618   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619   if(npartial>0){
620     val=oc_pack_read1(&_dec->opb);
621     flag=!(int)val;
622   }
623   else flag=0;
624   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625   sb_flags=_dec->state.sb_flags;
626   mb_modes=_dec->state.mb_modes;
627   frags=_dec->state.frags;
628   sbi=nsbs=run_count=0;
629   coded_fragis=_dec->state.coded_fragis;
630   uncoded_fragis=coded_fragis+_dec->state.nfrags;
631   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632   for(pli=0;pli<3;pli++){
633     nsbs+=_dec->state.fplanes[pli].nsbs;
634     for(;sbi<nsbs;sbi++){
635       int quadi;
636       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637         int quad_coded;
638         int bi;
639         quad_coded=0;
640         for(bi=0;bi<4;bi++){
641           ptrdiff_t fragi;
642           fragi=sb_maps[sbi][quadi][bi];
643           if(fragi>=0){
644             int coded;
645             if(sb_flags[sbi].coded_fully)coded=1;
646             else if(!sb_flags[sbi].coded_partially)coded=0;
647             else{
648               if(run_count<=0){
649                 run_count=oc_block_run_unpack(&_dec->opb);
650                 flag=!flag;
651               }
652               run_count--;
653               coded=flag;
654             }
655             if(coded)coded_fragis[ncoded_fragis++]=fragi;
656             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657             quad_coded|=coded;
658             frags[fragi].coded=coded;
659             frags[fragi].refi=OC_FRAME_NONE;
660           }
661         }
662         /*Remember if there's a coded luma block in this macro block.*/
663         if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664       }
665     }
666     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667     prev_ncoded_fragis=ncoded_fragis;
668   }
669   _dec->state.ntotal_coded_fragis=ncoded_fragis;
670   /*TODO: run_count should be 0 here.
671     If it's not, we should issue a warning of some kind.*/
672 }
673
674
675 /*Coding scheme:
676    Codeword            Mode Index
677    0                       0
678    10                      1
679    110                     2
680    1110                    3
681    11110                   4
682    111110                  5
683    1111110                 6
684    1111111                 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686   4,
687    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691     3,
692      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697   3,
698    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701
702 /*Unpacks the list of macro block modes for INTER frames.*/
703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704   signed char         *mb_modes;
705   const unsigned char *alphabet;
706   unsigned char        scheme0_alphabet[8];
707   const ogg_int16_t   *mode_tree;
708   size_t               nmbs;
709   size_t               mbi;
710   long                 val;
711   int                  mode_scheme;
712   val=oc_pack_read(&_dec->opb,3);
713   mode_scheme=(int)val;
714   if(mode_scheme==0){
715     int mi;
716     /*Just in case, initialize the modes to something.
717       If the bitstream doesn't contain each index exactly once, it's likely
718        corrupt and the rest of the packet is garbage anyway, but this way we
719        won't crash, and we'll decode SOMETHING.*/
720     /*LOOP VECTORIZES*/
721     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722     for(mi=0;mi<OC_NMODES;mi++){
723       val=oc_pack_read(&_dec->opb,3);
724       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725     }
726     alphabet=scheme0_alphabet;
727   }
728   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730   mb_modes=_dec->state.mb_modes;
731   nmbs=_dec->state.nmbs;
732   for(mbi=0;mbi<nmbs;mbi++){
733     if(mb_modes[mbi]>0){
734       /*We have a coded luma block; decode a mode.*/
735       mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736     }
737     /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738        fact that OC_MODE_INTER_NOMV is already 0.*/
739   }
740 }
741
742
743
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745   5,
746    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751    33,          36,          39,          42,
752    45,          50,          55,          60,
753    65,          74,          83,          92,
754     1,-(1<<8|32+4),-(1<<8|32-4),
755     1,-(1<<8|32+5),-(1<<8|32-5),
756     1,-(1<<8|32+6),-(1<<8|32-6),
757     1,-(1<<8|32+7),-(1<<8|32-7),
758     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762     3,
763      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765     3,
766      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768     3,
769      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771     3,
772      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777   6,
778    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795
796
797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798   int dx;
799   int dy;
800   dx=oc_huff_token_decode(_opb,_tree)-32;
801   dy=oc_huff_token_decode(_opb,_tree)-32;
802   return OC_MV(dx,dy);
803 }
804
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806    block modes and motion vectors to the individual fragments.*/
807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808   const oc_mb_map        *mb_maps;
809   const signed char      *mb_modes;
810   oc_set_chroma_mvs_func  set_chroma_mvs;
811   const ogg_int16_t      *mv_comp_tree;
812   oc_fragment            *frags;
813   oc_mv                  *frag_mvs;
814   const unsigned char    *map_idxs;
815   int                     map_nidxs;
816   oc_mv                   last_mv;
817   oc_mv                   prior_mv;
818   oc_mv                   cbmvs[4];
819   size_t                  nmbs;
820   size_t                  mbi;
821   long                    val;
822   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823   val=oc_pack_read1(&_dec->opb);
824   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827   prior_mv=last_mv=0;
828   frags=_dec->state.frags;
829   frag_mvs=_dec->state.frag_mvs;
830   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831   mb_modes=_dec->state.mb_modes;
832   nmbs=_dec->state.nmbs;
833   for(mbi=0;mbi<nmbs;mbi++){
834     int mb_mode;
835     mb_mode=mb_modes[mbi];
836     if(mb_mode!=OC_MODE_INVALID){
837       oc_mv     mbmv;
838       ptrdiff_t fragi;
839       int       mapi;
840       int       mapii;
841       int       refi;
842       if(mb_mode==OC_MODE_INTER_MV_FOUR){
843         oc_mv lbmvs[4];
844         int   bi;
845         prior_mv=last_mv;
846         for(bi=0;bi<4;bi++){
847           fragi=mb_maps[mbi][0][bi];
848           if(frags[fragi].coded){
849             frags[fragi].refi=OC_FRAME_PREV;
850             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851             lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852             frag_mvs[fragi]=lbmvs[bi];
853           }
854           else lbmvs[bi]=0;
855         }
856         (*set_chroma_mvs)(cbmvs,lbmvs);
857         for(mapii=4;mapii<map_nidxs;mapii++){
858           mapi=map_idxs[mapii];
859           bi=mapi&3;
860           fragi=mb_maps[mbi][mapi>>2][bi];
861           if(frags[fragi].coded){
862             frags[fragi].refi=OC_FRAME_PREV;
863             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864             frag_mvs[fragi]=cbmvs[bi];
865           }
866         }
867       }
868       else{
869         switch(mb_mode){
870           case OC_MODE_INTER_MV:{
871             prior_mv=last_mv;
872             last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873           }break;
874           case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875           case OC_MODE_INTER_MV_LAST2:{
876             mbmv=prior_mv;
877             prior_mv=last_mv;
878             last_mv=mbmv;
879           }break;
880           case OC_MODE_GOLDEN_MV:{
881             mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882           }break;
883           default:mbmv=0;break;
884         }
885         /*Fill in the MVs for the fragments.*/
886         refi=OC_FRAME_FOR_MODE(mb_mode);
887         mapii=0;
888         do{
889           mapi=map_idxs[mapii];
890           fragi=mb_maps[mbi][mapi>>2][mapi&3];
891           if(frags[fragi].coded){
892             frags[fragi].refi=refi;
893             frags[fragi].mb_mode=mb_mode;
894             frag_mvs[fragi]=mbmv;
895           }
896         }
897         while(++mapii<map_nidxs);
898       }
899     }
900   }
901 }
902
903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904   oc_fragment     *frags;
905   const ptrdiff_t *coded_fragis;
906   ptrdiff_t        ncoded_fragis;
907   ptrdiff_t        fragii;
908   ptrdiff_t        fragi;
909   ncoded_fragis=_dec->state.ntotal_coded_fragis;
910   if(ncoded_fragis<=0)return;
911   frags=_dec->state.frags;
912   coded_fragis=_dec->state.coded_fragis;
913   if(_dec->state.nqis==1){
914     /*If this frame has only a single qi value, then just use it for all coded
915        fragments.*/
916     for(fragii=0;fragii<ncoded_fragis;fragii++){
917       frags[coded_fragis[fragii]].qii=0;
918     }
919   }
920   else{
921     long val;
922     int  flag;
923     int  nqi1;
924     int  run_count;
925     /*Otherwise, we decode a qi index for each fragment, using two passes of
926       the same binary RLE scheme used for super-block coded bits.
927      The first pass marks each fragment as having a qii of 0 or greater than
928       0, and the second pass (if necessary), distinguishes between a qii of
929       1 and 2.
930      At first we just store the qii in the fragment.
931      After all the qii's are decoded, we make a final pass to replace them
932       with the corresponding qi's for this frame.*/
933     val=oc_pack_read1(&_dec->opb);
934     flag=(int)val;
935     nqi1=0;
936     fragii=0;
937     while(fragii<ncoded_fragis){
938       int full_run;
939       run_count=oc_sb_run_unpack(&_dec->opb);
940       full_run=run_count>=4129;
941       do{
942         frags[coded_fragis[fragii++]].qii=flag;
943         nqi1+=flag;
944       }
945       while(--run_count>0&&fragii<ncoded_fragis);
946       if(full_run&&fragii<ncoded_fragis){
947         val=oc_pack_read1(&_dec->opb);
948         flag=(int)val;
949       }
950       else flag=!flag;
951     }
952     /*TODO: run_count should be 0 here.
953       If it's not, we should issue a warning of some kind.*/
954     /*If we have 3 different qi's for this frame, and there was at least one
955        fragment with a non-zero qi, make the second pass.*/
956     if(_dec->state.nqis==3&&nqi1>0){
957       /*Skip qii==0 fragments.*/
958       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959       val=oc_pack_read1(&_dec->opb);
960       flag=(int)val;
961       do{
962         int full_run;
963         run_count=oc_sb_run_unpack(&_dec->opb);
964         full_run=run_count>=4129;
965         for(;fragii<ncoded_fragis;fragii++){
966           fragi=coded_fragis[fragii];
967           if(frags[fragi].qii==0)continue;
968           if(run_count--<=0)break;
969           frags[fragi].qii+=flag;
970         }
971         if(full_run&&fragii<ncoded_fragis){
972           val=oc_pack_read1(&_dec->opb);
973           flag=(int)val;
974         }
975         else flag=!flag;
976       }
977       while(fragii<ncoded_fragis);
978       /*TODO: run_count should be 0 here.
979         If it's not, we should issue a warning of some kind.*/
980     }
981   }
982 }
983
984
985
986 /*Unpacks the DC coefficient tokens.
987   Unlike when unpacking the AC coefficient tokens, we actually need to decode
988    the DC coefficient values now so that we can do DC prediction.
989   _huff_idx:   The index of the Huffman table to use for each color plane.
990   _ntoks_left: The number of tokens left to be decoded in each color plane for
991                 each coefficient.
992                This is updated as EOB tokens and zero run tokens are decoded.
993   Return: The length of any outstanding EOB run.*/
994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995  ptrdiff_t _ntoks_left[3][64]){
996   unsigned char   *dct_tokens;
997   oc_fragment     *frags;
998   const ptrdiff_t *coded_fragis;
999   ptrdiff_t        ncoded_fragis;
1000   ptrdiff_t        fragii;
1001   ptrdiff_t        eobs;
1002   ptrdiff_t        ti;
1003   int              pli;
1004   dct_tokens=_dec->dct_tokens;
1005   frags=_dec->state.frags;
1006   coded_fragis=_dec->state.coded_fragis;
1007   ncoded_fragis=fragii=eobs=ti=0;
1008   for(pli=0;pli<3;pli++){
1009     ptrdiff_t run_counts[64];
1010     ptrdiff_t eob_count;
1011     ptrdiff_t eobi;
1012     int       rli;
1013     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014     memset(run_counts,0,sizeof(run_counts));
1015     _dec->eob_runs[pli][0]=eobs;
1016     _dec->ti0[pli][0]=ti;
1017     /*Continue any previous EOB run, if there was one.*/
1018     eobi=eobs;
1019     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020     eob_count=eobi;
1021     eobs-=eobi;
1022     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023     while(fragii<ncoded_fragis){
1024       int token;
1025       int cw;
1026       int eb;
1027       int skip;
1028       token=oc_huff_token_decode(&_dec->opb,
1029        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030       dct_tokens[ti++]=(unsigned char)token;
1031       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032         eb=(int)oc_pack_read(&_dec->opb,
1033          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034         dct_tokens[ti++]=(unsigned char)eb;
1035         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036         eb<<=OC_DCT_TOKEN_EB_POS(token);
1037       }
1038       else eb=0;
1039       cw=OC_DCT_CODE_WORD[token]+eb;
1040       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042       if(eobs){
1043         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044         eob_count+=eobi;
1045         eobs-=eobi;
1046         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047       }
1048       else{
1049         int coeff;
1050         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053         if(skip)coeff=0;
1054         run_counts[skip]++;
1055         frags[coded_fragis[fragii++]].dc=coeff;
1056       }
1057     }
1058     /*Add the total EOB count to the longest run length.*/
1059     run_counts[63]+=eob_count;
1060     /*And convert the run_counts array to a moment table.*/
1061     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062     /*Finally, subtract off the number of coefficients that have been
1063        accounted for by runs started in this coefficient.*/
1064     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065   }
1066   _dec->dct_tokens_count=ti;
1067   return eobs;
1068 }
1069
1070 /*Unpacks the AC coefficient tokens.
1071   This can completely discard coefficient values while unpacking, and so is
1072    somewhat simpler than unpacking the DC coefficient tokens.
1073   _huff_idx:   The index of the Huffman table to use for each color plane.
1074   _ntoks_left: The number of tokens left to be decoded in each color plane for
1075                 each coefficient.
1076                This is updated as EOB tokens and zero run tokens are decoded.
1077   _eobs:       The length of any outstanding EOB run from previous
1078                 coefficients.
1079   Return: The length of any outstanding EOB run.*/
1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082   unsigned char *dct_tokens;
1083   ptrdiff_t      ti;
1084   int            pli;
1085   dct_tokens=_dec->dct_tokens;
1086   ti=_dec->dct_tokens_count;
1087   for(pli=0;pli<3;pli++){
1088     ptrdiff_t run_counts[64];
1089     ptrdiff_t eob_count;
1090     size_t    ntoks_left;
1091     size_t    ntoks;
1092     int       rli;
1093     _dec->eob_runs[pli][_zzi]=_eobs;
1094     _dec->ti0[pli][_zzi]=ti;
1095     ntoks_left=_ntoks_left[pli][_zzi];
1096     memset(run_counts,0,sizeof(run_counts));
1097     eob_count=0;
1098     ntoks=0;
1099     while(ntoks+_eobs<ntoks_left){
1100       int token;
1101       int cw;
1102       int eb;
1103       int skip;
1104       ntoks+=_eobs;
1105       eob_count+=_eobs;
1106       token=oc_huff_token_decode(&_dec->opb,
1107        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108       dct_tokens[ti++]=(unsigned char)token;
1109       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110         eb=(int)oc_pack_read(&_dec->opb,
1111          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112         dct_tokens[ti++]=(unsigned char)eb;
1113         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114         eb<<=OC_DCT_TOKEN_EB_POS(token);
1115       }
1116       else eb=0;
1117       cw=OC_DCT_CODE_WORD[token]+eb;
1118       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121       if(_eobs==0){
1122         run_counts[skip]++;
1123         ntoks++;
1124       }
1125     }
1126     /*Add the portion of the last EOB run actually used by this coefficient.*/
1127     eob_count+=ntoks_left-ntoks;
1128     /*And remove it from the remaining EOB count.*/
1129     _eobs-=ntoks_left-ntoks;
1130     /*Add the total EOB count to the longest run length.*/
1131     run_counts[63]+=eob_count;
1132     /*And convert the run_counts array to a moment table.*/
1133     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134     /*Finally, subtract off the number of coefficients that have been
1135        accounted for by runs started in this coefficient.*/
1136     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137   }
1138   _dec->dct_tokens_count=ti;
1139   return _eobs;
1140 }
1141
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143    stored in the bitstream grouped by coefficient, not by fragment.
1144
1145   This means that we either decode all the tokens in order, building up a
1146    separate coefficient list for each fragment as we go, and then go back and
1147    do the iDCT on each fragment, or we have to create separate lists of tokens
1148    for each coefficient, so that we can pull the next token required off the
1149    head of the appropriate list when decoding a specific fragment.
1150
1151   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152    decoded coefficient values.
1153
1154   We take the second option, which lets us store just one to three bytes per
1155    token (generally far fewer than the number of coefficients, due to EOB
1156    tokens and zero runs), and which requires us to only maintain a counter for
1157    each of the 64 coefficients, instead of a counter for every fragment to
1158    determine where the next token goes.
1159
1160   We actually use 3 counters per coefficient, one for each color plane, so we
1161    can decode all color planes simultaneously.
1162   This lets color conversion, etc., be done as soon as a full MCU (one or
1163    two super block rows) is decoded, while the image data is still in cache.*/
1164
1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167   ptrdiff_t  ntoks_left[3][64];
1168   int        huff_idxs[2];
1169   ptrdiff_t  eobs;
1170   long       val;
1171   int        pli;
1172   int        zzi;
1173   int        hgi;
1174   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176   }
1177   val=oc_pack_read(&_dec->opb,4);
1178   huff_idxs[0]=(int)val;
1179   val=oc_pack_read(&_dec->opb,4);
1180   huff_idxs[1]=(int)val;
1181   _dec->eob_runs[0][0]=0;
1182   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186   val=oc_pack_read(&_dec->opb,4);
1187   huff_idxs[0]=(int)val;
1188   val=oc_pack_read(&_dec->opb,4);
1189   huff_idxs[1]=(int)val;
1190   zzi=1;
1191   for(hgi=1;hgi<5;hgi++){
1192     huff_idxs[0]+=16;
1193     huff_idxs[1]+=16;
1194     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196     }
1197   }
1198   /*TODO: eobs should be exactly zero, or 4096 or greater.
1199     The second case occurs when an EOB run of size zero is encountered, which
1200      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201     If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203
1204
1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206   /*pp_level 0: disabled; free any memory used and return*/
1207   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1208     if(_dec->dc_qis!=NULL){
1209       _ogg_free(_dec->dc_qis);
1210       _dec->dc_qis=NULL;
1211       _ogg_free(_dec->variances);
1212       _dec->variances=NULL;
1213       _ogg_free(_dec->pp_frame_data);
1214       _dec->pp_frame_data=NULL;
1215     }
1216     return 1;
1217   }
1218   if(_dec->dc_qis==NULL){
1219     /*If we haven't been tracking DC quantization indices, there's no point in
1220        starting now.*/
1221     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1222     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1223      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1224     if(_dec->dc_qis==NULL)return 1;
1225     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1226   }
1227   else{
1228     unsigned char   *dc_qis;
1229     const ptrdiff_t *coded_fragis;
1230     ptrdiff_t        ncoded_fragis;
1231     ptrdiff_t        fragii;
1232     unsigned char    qi0;
1233     /*Update the DC quantization index of each coded block.*/
1234     dc_qis=_dec->dc_qis;
1235     coded_fragis=_dec->state.coded_fragis;
1236     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1237      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1238     qi0=(unsigned char)_dec->state.qis[0];
1239     for(fragii=0;fragii<ncoded_fragis;fragii++){
1240       dc_qis[coded_fragis[fragii]]=qi0;
1241     }
1242   }
1243   /*pp_level 1: Stop after updating DC quantization indices.*/
1244   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1245     if(_dec->variances!=NULL){
1246       _ogg_free(_dec->variances);
1247       _dec->variances=NULL;
1248       _ogg_free(_dec->pp_frame_data);
1249       _dec->pp_frame_data=NULL;
1250     }
1251     return 1;
1252   }
1253   if(_dec->variances==NULL){
1254     size_t frame_sz;
1255     size_t c_sz;
1256     int    c_w;
1257     int    c_h;
1258     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1259     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1260     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1261     c_sz=c_w*(size_t)c_h;
1262     /*Allocate space for the chroma planes, even if we're not going to use
1263        them; this simplifies allocation state management, though it may waste
1264        memory on the few systems that don't overcommit pages.*/
1265     frame_sz+=c_sz<<1;
1266     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1267      frame_sz*sizeof(_dec->pp_frame_data[0]));
1268     _dec->variances=(int *)_ogg_malloc(
1269      _dec->state.nfrags*sizeof(_dec->variances[0]));
1270     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1271       _ogg_free(_dec->pp_frame_data);
1272       _dec->pp_frame_data=NULL;
1273       _ogg_free(_dec->variances);
1274       _dec->variances=NULL;
1275       return 1;
1276     }
1277     /*Force an update of the PP buffer pointers.*/
1278     _dec->pp_frame_state=0;
1279   }
1280   /*Update the PP buffer pointers if necessary.*/
1281   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1282     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1283       /*If chroma processing is disabled, just use the PP luma plane.*/
1284       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1285       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1286       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1287       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1288        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1289     }
1290     else{
1291       size_t y_sz;
1292       size_t c_sz;
1293       int    c_w;
1294       int    c_h;
1295       /*Otherwise, set up pointers to all three PP planes.*/
1296       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1297       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1298       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1299       c_sz=c_w*(size_t)c_h;
1300       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1301       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1302       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1303       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1304       _dec->pp_frame_buf[1].width=c_w;
1305       _dec->pp_frame_buf[1].height=c_h;
1306       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1307       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1308       _dec->pp_frame_buf[2].width=c_w;
1309       _dec->pp_frame_buf[2].height=c_h;
1310       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1311       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1312       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1313     }
1314     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1315   }
1316   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1317   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1318     memcpy(_dec->pp_frame_buf+1,
1319      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1320      sizeof(_dec->pp_frame_buf[1])*2);
1321   }
1322   return 0;
1323 }
1324
1325
1326 /*Initialize the main decoding pipeline.*/
1327 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1328  oc_dec_pipeline_state *_pipe){
1329   const ptrdiff_t *coded_fragis;
1330   const ptrdiff_t *uncoded_fragis;
1331   int              flimit;
1332   int              pli;
1333   int              qii;
1334   int              qti;
1335   int              zzi;
1336   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1337      super block rows of Y' for each super block row of Cb and Cr.*/
1338   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1339   /*Initialize the token and extra bits indices for each plane and
1340      coefficient.*/
1341   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1342   /*Also copy over the initial the EOB run counts.*/
1343   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1344   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1345   coded_fragis=_dec->state.coded_fragis;
1346   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1347   for(pli=0;pli<3;pli++){
1348     ptrdiff_t ncoded_fragis;
1349     _pipe->coded_fragis[pli]=coded_fragis;
1350     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1351     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1352     coded_fragis+=ncoded_fragis;
1353     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1354   }
1355   /*Set up condensed quantizer tables.*/
1356   for(pli=0;pli<3;pli++){
1357     for(qii=0;qii<_dec->state.nqis;qii++){
1358       for(qti=0;qti<2;qti++){
1359         _pipe->dequant[pli][qii][qti]=
1360          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1361       }
1362     }
1363   }
1364   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1365   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1366   /*Initialize the bounding value array for the loop filter.*/
1367   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1368   _pipe->loop_filter=flimit!=0;
1369   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1370   /*Initialize any buffers needed for post-processing.
1371     We also save the current post-processing level, to guard against the user
1372      changing it from a callback.*/
1373   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1374   /*If we don't have enough information to post-process, disable it, regardless
1375      of the user-requested level.*/
1376   else{
1377     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1378     memcpy(_dec->pp_frame_buf,
1379      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1380      sizeof(_dec->pp_frame_buf[0])*3);
1381   }
1382   /*Clear down the DCT coefficient buffer for the first block.*/
1383   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1384 }
1385
1386 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1387    rows).
1388   As a side effect, the number of coded and uncoded fragments in this plane of
1389    the MCU is also computed.*/
1390 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1391  oc_dec_pipeline_state *_pipe,int _pli){
1392   const oc_fragment_plane *fplane;
1393   oc_fragment             *frags;
1394   int                     *pred_last;
1395   ptrdiff_t                ncoded_fragis;
1396   ptrdiff_t                fragi;
1397   int                      fragx;
1398   int                      fragy;
1399   int                      fragy0;
1400   int                      fragy_end;
1401   int                      nhfrags;
1402   /*Compute the first and last fragment row of the current MCU for this
1403      plane.*/
1404   fplane=_dec->state.fplanes+_pli;
1405   fragy0=_pipe->fragy0[_pli];
1406   fragy_end=_pipe->fragy_end[_pli];
1407   nhfrags=fplane->nhfrags;
1408   pred_last=_pipe->pred_last[_pli];
1409   frags=_dec->state.frags;
1410   ncoded_fragis=0;
1411   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1412   for(fragy=fragy0;fragy<fragy_end;fragy++){
1413     if(fragy==0){
1414       /*For the first row, all of the cases reduce to just using the previous
1415          predictor for the same reference frame.*/
1416       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1417         if(frags[fragi].coded){
1418           int refi;
1419           refi=frags[fragi].refi;
1420           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1421           ncoded_fragis++;
1422         }
1423       }
1424     }
1425     else{
1426       oc_fragment *u_frags;
1427       int          l_ref;
1428       int          ul_ref;
1429       int          u_ref;
1430       u_frags=frags-nhfrags;
1431       l_ref=-1;
1432       ul_ref=-1;
1433       u_ref=u_frags[fragi].refi;
1434       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1435         int ur_ref;
1436         if(fragx+1>=nhfrags)ur_ref=-1;
1437         else ur_ref=u_frags[fragi+1].refi;
1438         if(frags[fragi].coded){
1439           int pred;
1440           int refi;
1441           refi=frags[fragi].refi;
1442           /*We break out a separate case based on which of our neighbors use
1443              the same reference frames.
1444             This is somewhat faster than trying to make a generic case which
1445              handles all of them, since it reduces lots of poorly predicted
1446              jumps to one switch statement, and also lets a number of the
1447              multiplications be optimized out by strength reduction.*/
1448           switch((l_ref==refi)|(ul_ref==refi)<<1|
1449            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1450             default:pred=pred_last[refi];break;
1451             case  1:
1452             case  3:pred=frags[fragi-1].dc;break;
1453             case  2:pred=u_frags[fragi-1].dc;break;
1454             case  4:
1455             case  6:
1456             case 12:pred=u_frags[fragi].dc;break;
1457             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1458             case  8:pred=u_frags[fragi+1].dc;break;
1459             case  9:
1460             case 11:
1461             case 13:{
1462               /*The TI compiler mis-compiles this line.*/
1463               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1464             }break;
1465             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1466             case 14:{
1467               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1468                +10*u_frags[fragi].dc)/16;
1469             }break;
1470             case  7:
1471             case 15:{
1472               int p0;
1473               int p1;
1474               int p2;
1475               p0=frags[fragi-1].dc;
1476               p1=u_frags[fragi-1].dc;
1477               p2=u_frags[fragi].dc;
1478               pred=(29*(p0+p2)-26*p1)/32;
1479               if(abs(pred-p2)>128)pred=p2;
1480               else if(abs(pred-p0)>128)pred=p0;
1481               else if(abs(pred-p1)>128)pred=p1;
1482             }break;
1483           }
1484           pred_last[refi]=frags[fragi].dc+=pred;
1485           ncoded_fragis++;
1486           l_ref=refi;
1487         }
1488         else l_ref=-1;
1489         ul_ref=u_ref;
1490         u_ref=ur_ref;
1491       }
1492     }
1493   }
1494   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1495   /*Also save the number of uncoded fragments so we know how many to copy.*/
1496   _pipe->nuncoded_fragis[_pli]=
1497    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1498 }
1499
1500 /*Reconstructs all coded fragments in a single MCU (one or two super block
1501    rows).
1502   This requires that each coded fragment have a proper macro block mode and
1503    motion vector (if not in INTRA mode), and have its DC value decoded, with
1504    the DC prediction process reversed, and the number of coded and uncoded
1505    fragments in this plane of the MCU be counted.
1506   The token lists for each color plane and coefficient should also be filled
1507    in, along with initial token offsets, extra bits offsets, and EOB run
1508    counts.*/
1509 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1510  oc_dec_pipeline_state *_pipe,int _pli){
1511   unsigned char       *dct_tokens;
1512   const unsigned char *dct_fzig_zag;
1513   ogg_uint16_t         dc_quant[2];
1514   const oc_fragment   *frags;
1515   const ptrdiff_t     *coded_fragis;
1516   ptrdiff_t            ncoded_fragis;
1517   ptrdiff_t            fragii;
1518   ptrdiff_t           *ti;
1519   ptrdiff_t           *eob_runs;
1520   int                  qti;
1521   dct_tokens=_dec->dct_tokens;
1522   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1523   frags=_dec->state.frags;
1524   coded_fragis=_pipe->coded_fragis[_pli];
1525   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1526   ti=_pipe->ti[_pli];
1527   eob_runs=_pipe->eob_runs[_pli];
1528   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1529   for(fragii=0;fragii<ncoded_fragis;fragii++){
1530     const ogg_uint16_t *ac_quant;
1531     ptrdiff_t           fragi;
1532     int                 last_zzi;
1533     int                 zzi;
1534     fragi=coded_fragis[fragii];
1535     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537     /*Decode the AC coefficients.*/
1538     for(zzi=0;zzi<64;){
1539       int token;
1540       last_zzi=zzi;
1541       if(eob_runs[zzi]){
1542         eob_runs[zzi]--;
1543         break;
1544       }
1545       else{
1546         ptrdiff_t eob;
1547         int       cw;
1548         int       rlen;
1549         int       coeff;
1550         int       lti;
1551         lti=ti[zzi];
1552         token=dct_tokens[lti++];
1553         cw=OC_DCT_CODE_WORD[token];
1554         /*These parts could be done branchless, but the branches are fairly
1555            predictable and the C code translates into more than a few
1556            instructions, so it's worth it to avoid them.*/
1557         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559         }
1560         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561         if(token==OC_DCT_TOKEN_FAT_EOB){
1562           eob+=dct_tokens[lti++]<<8;
1563           if(eob==0)eob=OC_DCT_EOB_FINISH;
1564         }
1565         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568         eob_runs[zzi]=eob;
1569         ti[zzi]=lti;
1570         zzi+=rlen;
1571         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1572          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1573         zzi+=!eob;
1574       }
1575     }
1576     /*TODO: zzi should be exactly 64 here.
1577       If it's not, we should report some kind of warning.*/
1578     zzi=OC_MINI(zzi,64);
1579     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1580     /*last_zzi is always initialized.
1581       If your compiler thinks otherwise, it is dumb.*/
1582     oc_state_frag_recon(&_dec->state,fragi,_pli,
1583      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1584   }
1585   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1586   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1587   /*TODO: We make the decision here to always copy the uncoded blocks into it
1588      from the reference frame.
1589     We could also copy the coded blocks back over the reference frame, if we
1590      wait for an additional MCU to be decoded, which might be faster if only a
1591      small number of blocks are coded.
1592     However, this introduces more latency, creating a larger cache footprint.
1593     It's unknown which decision is better, but this one results in simpler
1594      code, and the hard case (high bitrate, high resolution) is handled
1595      correctly.*/
1596   /*Copy the uncoded blocks from the previous reference frame.*/
1597   if(_pipe->nuncoded_fragis[_pli]>0){
1598     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1599     oc_frag_copy_list(&_dec->state,
1600      _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1601      _dec->state.ref_frame_data[_dec->state.ref_frame_idx[OC_FRAME_PREV]],
1602      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1603      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1604   }
1605 }
1606
1607 /*Filter a horizontal block edge.*/
1608 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1609  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1610  int *_variance0,int *_variance1){
1611   unsigned char       *rdst;
1612   const unsigned char *rsrc;
1613   unsigned char       *cdst;
1614   const unsigned char *csrc;
1615   int                  r[10];
1616   int                  sum0;
1617   int                  sum1;
1618   int                  bx;
1619   int                  by;
1620   rdst=_dst;
1621   rsrc=_src;
1622   for(bx=0;bx<8;bx++){
1623     cdst=rdst;
1624     csrc=rsrc;
1625     for(by=0;by<10;by++){
1626       r[by]=*csrc;
1627       csrc+=_src_ystride;
1628     }
1629     sum0=sum1=0;
1630     for(by=0;by<4;by++){
1631       sum0+=abs(r[by+1]-r[by]);
1632       sum1+=abs(r[by+5]-r[by+6]);
1633     }
1634     *_variance0+=OC_MINI(255,sum0);
1635     *_variance1+=OC_MINI(255,sum1);
1636     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1637       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1638       cdst+=_dst_ystride;
1639       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1640       cdst+=_dst_ystride;
1641       for(by=0;by<4;by++){
1642         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1643          r[by+4]+r[by+5]+r[by+6]+4>>3);
1644         cdst+=_dst_ystride;
1645       }
1646       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1647       cdst+=_dst_ystride;
1648       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1649     }
1650     else{
1651       for(by=1;by<=8;by++){
1652         *cdst=(unsigned char)r[by];
1653         cdst+=_dst_ystride;
1654       }
1655     }
1656     rdst++;
1657     rsrc++;
1658   }
1659 }
1660
1661 /*Filter a vertical block edge.*/
1662 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1663  int _qstep,int _flimit,int *_variances){
1664   unsigned char       *rdst;
1665   const unsigned char *rsrc;
1666   unsigned char       *cdst;
1667   int                  r[10];
1668   int                  sum0;
1669   int                  sum1;
1670   int                  bx;
1671   int                  by;
1672   cdst=_dst;
1673   for(by=0;by<8;by++){
1674     rsrc=cdst-1;
1675     rdst=cdst;
1676     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1677     sum0=sum1=0;
1678     for(bx=0;bx<4;bx++){
1679       sum0+=abs(r[bx+1]-r[bx]);
1680       sum1+=abs(r[bx+5]-r[bx+6]);
1681     }
1682     _variances[0]+=OC_MINI(255,sum0);
1683     _variances[1]+=OC_MINI(255,sum1);
1684     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1685       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1686       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1687       for(bx=0;bx<4;bx++){
1688         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1689          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1690       }
1691       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1692       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1693     }
1694     cdst+=_dst_ystride;
1695   }
1696 }
1697
1698 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1699  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1700  int _fragy_end){
1701   oc_fragment_plane   *fplane;
1702   int                 *variance;
1703   unsigned char       *dc_qi;
1704   unsigned char       *dst;
1705   const unsigned char *src;
1706   ptrdiff_t            froffset;
1707   int                  dst_ystride;
1708   int                  src_ystride;
1709   int                  nhfrags;
1710   int                  width;
1711   int                  notstart;
1712   int                  notdone;
1713   int                  flimit;
1714   int                  qstep;
1715   int                  y_end;
1716   int                  y;
1717   int                  x;
1718   _dst+=_pli;
1719   _src+=_pli;
1720   fplane=_dec->state.fplanes+_pli;
1721   nhfrags=fplane->nhfrags;
1722   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1723   variance=_dec->variances+froffset;
1724   dc_qi=_dec->dc_qis+froffset;
1725   notstart=_fragy0>0;
1726   notdone=_fragy_end<fplane->nvfrags;
1727   /*We want to clear an extra row of variances, except at the end.*/
1728   memset(variance+(nhfrags&-notstart),0,
1729    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1730   /*Except for the first time, we want to point to the middle of the row.*/
1731   y=(_fragy0<<3)+(notstart<<2);
1732   dst_ystride=_dst->stride;
1733   src_ystride=_src->stride;
1734   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1735   src=_src->data+y*(ptrdiff_t)src_ystride;
1736   width=_dst->width;
1737   for(;y<4;y++){
1738     memcpy(dst,src,width*sizeof(dst[0]));
1739     dst+=dst_ystride;
1740     src+=src_ystride;
1741   }
1742   /*We also want to skip the last row in the frame for this loop.*/
1743   y_end=_fragy_end-!notdone<<3;
1744   for(;y<y_end;y+=8){
1745     qstep=_dec->pp_dc_scale[*dc_qi];
1746     flimit=(qstep*3)>>2;
1747     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1748      qstep,flimit,variance,variance+nhfrags);
1749     variance++;
1750     dc_qi++;
1751     for(x=8;x<width;x+=8){
1752       qstep=_dec->pp_dc_scale[*dc_qi];
1753       flimit=(qstep*3)>>2;
1754       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1755        qstep,flimit,variance,variance+nhfrags);
1756       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1757        qstep,flimit,variance-1);
1758       variance++;
1759       dc_qi++;
1760     }
1761     dst+=dst_ystride<<3;
1762     src+=src_ystride<<3;
1763   }
1764   /*And finally, handle the last row in the frame, if it's in the range.*/
1765   if(!notdone){
1766     int height;
1767     height=_dst->height;
1768     for(;y<height;y++){
1769       memcpy(dst,src,width*sizeof(dst[0]));
1770       dst+=dst_ystride;
1771       src+=src_ystride;
1772     }
1773     /*Filter the last row of vertical block edges.*/
1774     dc_qi++;
1775     for(x=8;x<width;x+=8){
1776       qstep=_dec->pp_dc_scale[*dc_qi++];
1777       flimit=(qstep*3)>>2;
1778       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1779        qstep,flimit,variance++);
1780     }
1781   }
1782 }
1783
1784 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1785  int _dc_scale,int _sharp_mod,int _strong){
1786   static const unsigned char OC_MOD_MAX[2]={24,32};
1787   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1788   const unsigned char *psrc;
1789   const unsigned char *src;
1790   const unsigned char *nsrc;
1791   unsigned char       *dst;
1792   int                  vmod[72];
1793   int                  hmod[72];
1794   int                  mod_hi;
1795   int                  by;
1796   int                  bx;
1797   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1798   dst=_idata;
1799   src=dst;
1800   psrc=src-(_ystride&-!(_b&4));
1801   for(by=0;by<9;by++){
1802     for(bx=0;bx<8;bx++){
1803       int mod;
1804       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1805       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1806     }
1807     psrc=src;
1808     src+=_ystride&-(!(_b&8)|by<7);
1809   }
1810   nsrc=dst;
1811   psrc=dst-!(_b&1);
1812   for(bx=0;bx<9;bx++){
1813     src=nsrc;
1814     for(by=0;by<8;by++){
1815       int mod;
1816       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1817       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1818       psrc+=_ystride;
1819       src+=_ystride;
1820     }
1821     psrc=nsrc;
1822     nsrc+=!(_b&2)|bx<7;
1823   }
1824   src=dst;
1825   psrc=src-(_ystride&-!(_b&4));
1826   nsrc=src+_ystride;
1827   for(by=0;by<8;by++){
1828     int a;
1829     int b;
1830     int w;
1831     a=128;
1832     b=64;
1833     w=hmod[by];
1834     a-=w;
1835     b+=w**(src-!(_b&1));
1836     w=vmod[by<<3];
1837     a-=w;
1838     b+=w*psrc[0];
1839     w=vmod[by+1<<3];
1840     a-=w;
1841     b+=w*nsrc[0];
1842     w=hmod[(1<<3)+by];
1843     a-=w;
1844     b+=w*src[1];
1845     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1846     for(bx=1;bx<7;bx++){
1847       a=128;
1848       b=64;
1849       w=hmod[(bx<<3)+by];
1850       a-=w;
1851       b+=w*src[bx-1];
1852       w=vmod[(by<<3)+bx];
1853       a-=w;
1854       b+=w*psrc[bx];
1855       w=vmod[(by+1<<3)+bx];
1856       a-=w;
1857       b+=w*nsrc[bx];
1858       w=hmod[(bx+1<<3)+by];
1859       a-=w;
1860       b+=w*src[bx+1];
1861       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1862     }
1863     a=128;
1864     b=64;
1865     w=hmod[(7<<3)+by];
1866     a-=w;
1867     b+=w*src[6];
1868     w=vmod[(by<<3)+7];
1869     a-=w;
1870     b+=w*psrc[7];
1871     w=vmod[(by+1<<3)+7];
1872     a-=w;
1873     b+=w*nsrc[7];
1874     w=hmod[(8<<3)+by];
1875     a-=w;
1876     b+=w*src[7+!(_b&2)];
1877     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1878     dst+=_ystride;
1879     psrc=src;
1880     src=nsrc;
1881     nsrc+=_ystride&-(!(_b&8)|by<6);
1882   }
1883 }
1884
1885 #define OC_DERING_THRESH1 (384)
1886 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1887 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1888 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1889
1890 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1891  int _pli,int _fragy0,int _fragy_end){
1892   th_img_plane      *iplane;
1893   oc_fragment_plane *fplane;
1894   oc_fragment       *frag;
1895   int               *variance;
1896   unsigned char     *idata;
1897   ptrdiff_t          froffset;
1898   int                ystride;
1899   int                nhfrags;
1900   int                sthresh;
1901   int                strong;
1902   int                y_end;
1903   int                width;
1904   int                height;
1905   int                y;
1906   int                x;
1907   iplane=_img+_pli;
1908   fplane=_dec->state.fplanes+_pli;
1909   nhfrags=fplane->nhfrags;
1910   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1911   variance=_dec->variances+froffset;
1912   frag=_dec->state.frags+froffset;
1913   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1914   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1915   y=_fragy0<<3;
1916   ystride=iplane->stride;
1917   idata=iplane->data+y*(ptrdiff_t)ystride;
1918   y_end=_fragy_end<<3;
1919   width=iplane->width;
1920   height=iplane->height;
1921   for(;y<y_end;y+=8){
1922     for(x=0;x<width;x+=8){
1923       int b;
1924       int qi;
1925       int var;
1926       qi=_dec->state.qis[frag->qii];
1927       var=*variance;
1928       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1929       if(strong&&var>sthresh){
1930         oc_dering_block(idata+x,ystride,b,
1931          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1933          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1934          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1935          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1936           oc_dering_block(idata+x,ystride,b,
1937            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1938           oc_dering_block(idata+x,ystride,b,
1939            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940         }
1941       }
1942       else if(var>OC_DERING_THRESH2){
1943         oc_dering_block(idata+x,ystride,b,
1944          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1945       }
1946       else if(var>OC_DERING_THRESH1){
1947         oc_dering_block(idata+x,ystride,b,
1948          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1949       }
1950       frag++;
1951       variance++;
1952     }
1953     idata+=ystride<<3;
1954   }
1955 }
1956
1957
1958
1959 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1960   oc_dec_ctx *dec;
1961   if(_info==NULL||_setup==NULL)return NULL;
1962   dec=oc_aligned_malloc(sizeof(*dec),16);
1963   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1964     oc_aligned_free(dec);
1965     return NULL;
1966   }
1967   dec->state.curframe_num=0;
1968   return dec;
1969 }
1970
1971 void th_decode_free(th_dec_ctx *_dec){
1972   if(_dec!=NULL){
1973     oc_dec_clear(_dec);
1974     oc_aligned_free(_dec);
1975   }
1976 }
1977
1978 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1979  size_t _buf_sz){
1980   switch(_req){
1981   case TH_DECCTL_GET_PPLEVEL_MAX:{
1982     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1983     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1984     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1985     return 0;
1986   }break;
1987   case TH_DECCTL_SET_PPLEVEL:{
1988     int pp_level;
1989     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1990     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1991     pp_level=*(int *)_buf;
1992     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1993     _dec->pp_level=pp_level;
1994     return 0;
1995   }break;
1996   case TH_DECCTL_SET_GRANPOS:{
1997     ogg_int64_t granpos;
1998     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1999     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2000     granpos=*(ogg_int64_t *)_buf;
2001     if(granpos<0)return TH_EINVAL;
2002     _dec->state.granpos=granpos;
2003     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2004      -_dec->state.granpos_bias;
2005     _dec->state.curframe_num=_dec->state.keyframe_num
2006      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2007     return 0;
2008   }break;
2009   case TH_DECCTL_SET_STRIPE_CB:{
2010     th_stripe_callback *cb;
2011     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2012     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2013     cb=(th_stripe_callback *)_buf;
2014     _dec->stripe_cb.ctx=cb->ctx;
2015     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2016     return 0;
2017   }break;
2018 #ifdef HAVE_CAIRO
2019   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2020     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2021     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2022     _dec->telemetry=1;
2023     _dec->telemetry_mbmode=*(int *)_buf;
2024     return 0;
2025   }break;
2026   case TH_DECCTL_SET_TELEMETRY_MV:{
2027     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2028     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2029     _dec->telemetry=1;
2030     _dec->telemetry_mv=*(int *)_buf;
2031     return 0;
2032   }break;
2033   case TH_DECCTL_SET_TELEMETRY_QI:{
2034     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036     _dec->telemetry=1;
2037     _dec->telemetry_qi=*(int *)_buf;
2038     return 0;
2039   }break;
2040   case TH_DECCTL_SET_TELEMETRY_BITS:{
2041     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2042     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2043     _dec->telemetry=1;
2044     _dec->telemetry_bits=*(int *)_buf;
2045     return 0;
2046   }break;
2047 #endif
2048   default:return TH_EIMPL;
2049   }
2050 }
2051
2052 /*We're decoding an INTER frame, but have no initialized reference
2053    buffers (i.e., decoding did not start on a key frame).
2054   We initialize them to a solid gray here.*/
2055 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2056   th_info *info;
2057   size_t   yplane_sz;
2058   size_t   cplane_sz;
2059   int      yhstride;
2060   int      yheight;
2061   int      chstride;
2062   int      cheight;
2063   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2064   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2065   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2066   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2067    sizeof(_dec->pp_frame_buf[0])*3);
2068   info=&_dec->state.info;
2069   yhstride=info->frame_width+2*OC_UMV_PADDING;
2070   yheight=info->frame_height+2*OC_UMV_PADDING;
2071   chstride=yhstride>>!(info->pixel_fmt&1);
2072   cheight=yheight>>!(info->pixel_fmt&2);
2073   yplane_sz=yhstride*(size_t)yheight;
2074   cplane_sz=chstride*(size_t)cheight;
2075   memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
2076 }
2077
2078 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2079  ogg_int64_t *_granpos){
2080   int ret;
2081   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2082   /*A completely empty packet indicates a dropped frame and is treated exactly
2083      like an inter frame with no coded blocks.*/
2084   if(_op->bytes==0){
2085     _dec->state.frame_type=OC_INTER_FRAME;
2086     _dec->state.ntotal_coded_fragis=0;
2087   }
2088   else{
2089     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2090     ret=oc_dec_frame_header_unpack(_dec);
2091     if(ret<0)return ret;
2092     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2093     else oc_dec_coded_flags_unpack(_dec);
2094   }
2095   /*If there have been no reference frames, and we need one, initialize one.*/
2096   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2097    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2098    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2099     oc_dec_init_dummy_frame(_dec);
2100   }
2101   /*If this was an inter frame with no coded blocks...*/
2102   if(_dec->state.ntotal_coded_fragis<=0){
2103     /*Just update the granule position and return.*/
2104     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2105      _dec->state.info.keyframe_granule_shift)
2106      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2107     _dec->state.curframe_num++;
2108     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2109     return TH_DUPFRAME;
2110   }
2111   else{
2112     th_ycbcr_buffer stripe_buf;
2113     int             stripe_fragy;
2114     int             refi;
2115     int             pli;
2116     int             notstart;
2117     int             notdone;
2118     /*Select a free buffer to use for the reconstructed version of this frame.*/
2119     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2120      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2121     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2122 #if defined(HAVE_CAIRO)
2123     _dec->telemetry_frame_bytes=_op->bytes;
2124 #endif
2125     if(_dec->state.frame_type==OC_INTRA_FRAME){
2126       _dec->state.keyframe_num=_dec->state.curframe_num;
2127 #if defined(HAVE_CAIRO)
2128       _dec->telemetry_coding_bytes=
2129        _dec->telemetry_mode_bytes=
2130        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2131 #endif
2132     }
2133     else{
2134 #if defined(HAVE_CAIRO)
2135       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2136 #endif
2137       oc_dec_mb_modes_unpack(_dec);
2138 #if defined(HAVE_CAIRO)
2139       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2140 #endif
2141       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2142 #if defined(HAVE_CAIRO)
2143       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2144 #endif
2145     }
2146     oc_dec_block_qis_unpack(_dec);
2147 #if defined(HAVE_CAIRO)
2148     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2149 #endif
2150     oc_dec_residual_tokens_unpack(_dec);
2151     /*Update granule position.
2152       This must be done before the striped decode callbacks so that the
2153        application knows what to do with the frame data.*/
2154     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2155      _dec->state.info.keyframe_granule_shift)
2156      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2157     _dec->state.curframe_num++;
2158     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2159     /*All of the rest of the operations -- DC prediction reversal,
2160        reconstructing coded fragments, copying uncoded fragments, loop
2161        filtering, extending borders, and out-of-loop post-processing -- should
2162        be pipelined.
2163       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2164        copying are done for one or two super block rows, then loop filtering is
2165        run as far as it can, then bordering copying, then post-processing.
2166       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2167        block rows, and one chroma.
2168       Otherwise, an MCU consists of one super block row from each plane.
2169       Inside each MCU, we perform all of the steps on one color plane before
2170        moving on to the next.
2171       After reconstruction, the additional filtering stages introduce a delay
2172        since they need some pixels from the next fragment row.
2173       Thus the actual number of decoded rows available is slightly smaller for
2174        the first MCU, and slightly larger for the last.
2175
2176       This entire process allows us to operate on the data while it is still in
2177        cache, resulting in big performance improvements.
2178       An application callback allows further application processing (blitting
2179        to video memory, color conversion, etc.) to also use the data while it's
2180        in cache.*/
2181     oc_dec_pipeline_init(_dec,&_dec->pipe);
2182     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2183     notstart=0;
2184     notdone=1;
2185     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2186       int avail_fragy0;
2187       int avail_fragy_end;
2188       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2189       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2190       for(pli=0;pli<3;pli++){
2191         oc_fragment_plane *fplane;
2192         int                frag_shift;
2193         int                pp_offset;
2194         int                sdelay;
2195         int                edelay;
2196         fplane=_dec->state.fplanes+pli;
2197         /*Compute the first and last fragment row of the current MCU for this
2198            plane.*/
2199         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2200         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2201         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2202          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2203         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2204         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2205         sdelay=edelay=0;
2206         if(_dec->pipe.loop_filter){
2207           sdelay+=notstart;
2208           edelay+=notdone;
2209           oc_state_loop_filter_frag_rows(&_dec->state,
2210            _dec->pipe.bounding_values,refi,pli,
2211            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2212         }
2213         /*To fill the borders, we have an additional two pixel delay, since a
2214            fragment in the next row could filter its top edge, using two pixels
2215            from a fragment in this row.
2216           But there's no reason to delay a full fragment between the two.*/
2217         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2218          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2219          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2220         /*Out-of-loop post-processing.*/
2221         pp_offset=3*(pli!=0);
2222         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2223           /*Perform de-blocking in one plane.*/
2224           sdelay+=notstart;
2225           edelay+=notdone;
2226           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2227            _dec->state.ref_frame_bufs[refi],pli,
2228            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2229           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2230             /*Perform de-ringing in one plane.*/
2231             sdelay+=notstart;
2232             edelay+=notdone;
2233             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2234              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2235           }
2236         }
2237         /*If no post-processing is done, we still need to delay a row for the
2238            loop filter, thanks to the strange filtering order VP3 chose.*/
2239         else if(_dec->pipe.loop_filter){
2240           sdelay+=notstart;
2241           edelay+=notdone;
2242         }
2243         /*Compute the intersection of the available rows in all planes.
2244           If chroma is sub-sampled, the effect of each of its delays is
2245            doubled, but luma might have more post-processing filters enabled
2246            than chroma, so we don't know up front which one is the limiting
2247            factor.*/
2248         avail_fragy0=OC_MINI(avail_fragy0,
2249          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2250         avail_fragy_end=OC_MINI(avail_fragy_end,
2251          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2252       }
2253       if(_dec->stripe_cb.stripe_decoded!=NULL){
2254         /*The callback might want to use the FPU, so let's make sure they can.
2255           We violate all kinds of ABI restrictions by not doing this until
2256            now, but none of them actually matter since we don't use floating
2257            point ourselves.*/
2258         oc_restore_fpu(&_dec->state);
2259         /*Make the callback, ensuring we flip the sense of the "start" and
2260            "end" of the available region upside down.*/
2261         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2262          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2263          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2264       }
2265       notstart=1;
2266     }
2267     /*Finish filling in the reference frame borders.*/
2268     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2269     /*Update the reference frame indices.*/
2270     if(_dec->state.frame_type==OC_INTRA_FRAME){
2271       /*The new frame becomes both the previous and gold reference frames.*/
2272       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2273        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2274        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2275     }
2276     else{
2277       /*Otherwise, just replace the previous reference frame.*/
2278       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2279        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2280     }
2281     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2282        gamma values, if nothing else).*/
2283     oc_restore_fpu(&_dec->state);
2284 #if defined(OC_DUMP_IMAGES)
2285     /*We only dump images if there were some coded blocks.*/
2286     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2287 #endif
2288     return 0;
2289   }
2290 }
2291
2292 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2293   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2294   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2295 #if defined(HAVE_CAIRO)
2296   /*If telemetry ioctls are active, we need to draw to the output buffer.
2297     Stuff the plane into cairo.*/
2298   if(_dec->telemetry){
2299     cairo_surface_t *cs;
2300     unsigned char   *data;
2301     unsigned char   *y_row;
2302     unsigned char   *u_row;
2303     unsigned char   *v_row;
2304     unsigned char   *rgb_row;
2305     int              cstride;
2306     int              w;
2307     int              h;
2308     int              x;
2309     int              y;
2310     int              hdec;
2311     int              vdec;
2312     w=_ycbcr[0].width;
2313     h=_ycbcr[0].height;
2314     hdec=!(_dec->state.info.pixel_fmt&1);
2315     vdec=!(_dec->state.info.pixel_fmt&2);
2316     /*Lazy data buffer init.
2317       We could try to re-use the post-processing buffer, which would save
2318        memory, but complicate the allocation logic there.
2319       I don't think anyone cares about memory usage when using telemetry; it is
2320        not meant for embedded devices.*/
2321     if(_dec->telemetry_frame_data==NULL){
2322       _dec->telemetry_frame_data=_ogg_malloc(
2323        (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2324       if(_dec->telemetry_frame_data==NULL)return 0;
2325     }
2326     cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2327     /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2328     data=cairo_image_surface_get_data(cs);
2329     if(data==NULL){
2330       cairo_surface_destroy(cs);
2331       return 0;
2332     }
2333     cstride=cairo_image_surface_get_stride(cs);
2334     y_row=_ycbcr[0].data;
2335     u_row=_ycbcr[1].data;
2336     v_row=_ycbcr[2].data;
2337     rgb_row=data;
2338     for(y=0;y<h;y++){
2339       for(x=0;x<w;x++){
2340         int r;
2341         int g;
2342         int b;
2343         r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2344         g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2345          -2672387*v_row[x>>hdec]+447306710)/3287200;
2346         b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2347         rgb_row[4*x+0]=OC_CLAMP255(b);
2348         rgb_row[4*x+1]=OC_CLAMP255(g);
2349         rgb_row[4*x+2]=OC_CLAMP255(r);
2350       }
2351       y_row+=_ycbcr[0].stride;
2352       u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2353       v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2354       rgb_row+=cstride;
2355     }
2356     /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2357     {
2358       cairo_t           *c;
2359       const oc_fragment *frags;
2360       oc_mv             *frag_mvs;
2361       const signed char *mb_modes;
2362       oc_mb_map         *mb_maps;
2363       size_t             nmbs;
2364       size_t             mbi;
2365       int                row2;
2366       int                col2;
2367       int                qim[3]={0,0,0};
2368       if(_dec->state.nqis==2){
2369         int bqi;
2370         bqi=_dec->state.qis[0];
2371         if(_dec->state.qis[1]>bqi)qim[1]=1;
2372         if(_dec->state.qis[1]<bqi)qim[1]=-1;
2373       }
2374       if(_dec->state.nqis==3){
2375         int bqi;
2376         int cqi;
2377         int dqi;
2378         bqi=_dec->state.qis[0];
2379         cqi=_dec->state.qis[1];
2380         dqi=_dec->state.qis[2];
2381         if(cqi>bqi&&dqi>bqi){
2382           if(dqi>cqi){
2383             qim[1]=1;
2384             qim[2]=2;
2385           }
2386           else{
2387             qim[1]=2;
2388             qim[2]=1;
2389           }
2390         }
2391         else if(cqi<bqi&&dqi<bqi){
2392           if(dqi<cqi){
2393             qim[1]=-1;
2394             qim[2]=-2;
2395           }
2396           else{
2397             qim[1]=-2;
2398             qim[2]=-1;
2399           }
2400         }
2401         else{
2402           if(cqi<bqi)qim[1]=-1;
2403           else qim[1]=1;
2404           if(dqi<bqi)qim[2]=-1;
2405           else qim[2]=1;
2406         }
2407       }
2408       c=cairo_create(cs);
2409       frags=_dec->state.frags;
2410       frag_mvs=_dec->state.frag_mvs;
2411       mb_modes=_dec->state.mb_modes;
2412       mb_maps=_dec->state.mb_maps;
2413       nmbs=_dec->state.nmbs;
2414       row2=0;
2415       col2=0;
2416       for(mbi=0;mbi<nmbs;mbi++){
2417         float x;
2418         float y;
2419         int   bi;
2420         y=h-(row2+((col2+1>>1)&1))*16-16;
2421         x=(col2>>1)*16;
2422         cairo_set_line_width(c,1.);
2423         /*Keyframe (all intra) red box.*/
2424         if(_dec->state.frame_type==OC_INTRA_FRAME){
2425           if(_dec->telemetry_mbmode&0x02){
2426             cairo_set_source_rgba(c,1.,0,0,.5);
2427             cairo_rectangle(c,x+2.5,y+2.5,11,11);
2428             cairo_stroke_preserve(c);
2429             cairo_set_source_rgba(c,1.,0,0,.25);
2430             cairo_fill(c);
2431           }
2432         }
2433         else{
2434           ptrdiff_t fragi;
2435           int       frag_mvx;
2436           int       frag_mvy;
2437           for(bi=0;bi<4;bi++){
2438             fragi=mb_maps[mbi][0][bi];
2439             if(fragi>=0&&frags[fragi].coded){
2440               frag_mvx=OC_MV_X(frag_mvs[fragi]);
2441               frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2442               break;
2443             }
2444           }
2445           if(bi<4){
2446             switch(mb_modes[mbi]){
2447               case OC_MODE_INTRA:{
2448                 if(_dec->telemetry_mbmode&0x02){
2449                   cairo_set_source_rgba(c,1.,0,0,.5);
2450                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2451                   cairo_stroke_preserve(c);
2452                   cairo_set_source_rgba(c,1.,0,0,.25);
2453                   cairo_fill(c);
2454                 }
2455               }break;
2456               case OC_MODE_INTER_NOMV:{
2457                 if(_dec->telemetry_mbmode&0x01){
2458                   cairo_set_source_rgba(c,0,0,1.,.5);
2459                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2460                   cairo_stroke_preserve(c);
2461                   cairo_set_source_rgba(c,0,0,1.,.25);
2462                   cairo_fill(c);
2463                 }
2464               }break;
2465               case OC_MODE_INTER_MV:{
2466                 if(_dec->telemetry_mbmode&0x04){
2467                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2468                   cairo_set_source_rgba(c,0,1.,0,.5);
2469                   cairo_stroke(c);
2470                 }
2471                 if(_dec->telemetry_mv&0x04){
2472                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2473                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2474                   cairo_set_line_width(c,3.);
2475                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2476                   cairo_stroke_preserve(c);
2477                   cairo_set_line_width(c,2.);
2478                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2479                   cairo_stroke_preserve(c);
2480                   cairo_set_line_width(c,1.);
2481                   cairo_line_to(c,x+8,y+8);
2482                   cairo_stroke(c);
2483                 }
2484               }break;
2485               case OC_MODE_INTER_MV_LAST:{
2486                 if(_dec->telemetry_mbmode&0x08){
2487                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2488                   cairo_set_source_rgba(c,0,1.,0,.5);
2489                   cairo_move_to(c,x+13.5,y+2.5);
2490                   cairo_line_to(c,x+2.5,y+8);
2491                   cairo_line_to(c,x+13.5,y+13.5);
2492                   cairo_stroke(c);
2493                 }
2494                 if(_dec->telemetry_mv&0x08){
2495                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2496                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2497                   cairo_set_line_width(c,3.);
2498                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2499                   cairo_stroke_preserve(c);
2500                   cairo_set_line_width(c,2.);
2501                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2502                   cairo_stroke_preserve(c);
2503                   cairo_set_line_width(c,1.);
2504                   cairo_line_to(c,x+8,y+8);
2505                   cairo_stroke(c);
2506                 }
2507               }break;
2508               case OC_MODE_INTER_MV_LAST2:{
2509                 if(_dec->telemetry_mbmode&0x10){
2510                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2511                   cairo_set_source_rgba(c,0,1.,0,.5);
2512                   cairo_move_to(c,x+8,y+2.5);
2513                   cairo_line_to(c,x+2.5,y+8);
2514                   cairo_line_to(c,x+8,y+13.5);
2515                   cairo_move_to(c,x+13.5,y+2.5);
2516                   cairo_line_to(c,x+8,y+8);
2517                   cairo_line_to(c,x+13.5,y+13.5);
2518                   cairo_stroke(c);
2519                 }
2520                 if(_dec->telemetry_mv&0x10){
2521                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2522                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2523                   cairo_set_line_width(c,3.);
2524                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2525                   cairo_stroke_preserve(c);
2526                   cairo_set_line_width(c,2.);
2527                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2528                   cairo_stroke_preserve(c);
2529                   cairo_set_line_width(c,1.);
2530                   cairo_line_to(c,x+8,y+8);
2531                   cairo_stroke(c);
2532                 }
2533               }break;
2534               case OC_MODE_GOLDEN_NOMV:{
2535                 if(_dec->telemetry_mbmode&0x20){
2536                   cairo_set_source_rgba(c,1.,1.,0,.5);
2537                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2538                   cairo_stroke_preserve(c);
2539                   cairo_set_source_rgba(c,1.,1.,0,.25);
2540                   cairo_fill(c);
2541                 }
2542               }break;
2543               case OC_MODE_GOLDEN_MV:{
2544                 if(_dec->telemetry_mbmode&0x40){
2545                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2546                   cairo_set_source_rgba(c,1.,1.,0,.5);
2547                   cairo_stroke(c);
2548                 }
2549                 if(_dec->telemetry_mv&0x40){
2550                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2551                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2552                   cairo_set_line_width(c,3.);
2553                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2554                   cairo_stroke_preserve(c);
2555                   cairo_set_line_width(c,2.);
2556                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2557                   cairo_stroke_preserve(c);
2558                   cairo_set_line_width(c,1.);
2559                   cairo_line_to(c,x+8,y+8);
2560                   cairo_stroke(c);
2561                 }
2562               }break;
2563               case OC_MODE_INTER_MV_FOUR:{
2564                 if(_dec->telemetry_mbmode&0x80){
2565                   cairo_rectangle(c,x+2.5,y+2.5,4,4);
2566                   cairo_rectangle(c,x+9.5,y+2.5,4,4);
2567                   cairo_rectangle(c,x+2.5,y+9.5,4,4);
2568                   cairo_rectangle(c,x+9.5,y+9.5,4,4);
2569                   cairo_set_source_rgba(c,0,1.,0,.5);
2570                   cairo_stroke(c);
2571                 }
2572                 /*4mv is odd, coded in raster order.*/
2573                 fragi=mb_maps[mbi][0][0];
2574                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2575                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2576                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2577                   cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2578                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2579                   cairo_set_line_width(c,3.);
2580                   cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2581                   cairo_stroke_preserve(c);
2582                   cairo_set_line_width(c,2.);
2583                   cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2584                   cairo_stroke_preserve(c);
2585                   cairo_set_line_width(c,1.);
2586                   cairo_line_to(c,x+4,y+12);
2587                   cairo_stroke(c);
2588                 }
2589                 fragi=mb_maps[mbi][0][1];
2590                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2591                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2592                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2593                   cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2594                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2595                   cairo_set_line_width(c,3.);
2596                   cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2597                   cairo_stroke_preserve(c);
2598                   cairo_set_line_width(c,2.);
2599                   cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2600                   cairo_stroke_preserve(c);
2601                   cairo_set_line_width(c,1.);
2602                   cairo_line_to(c,x+12,y+12);
2603                   cairo_stroke(c);
2604                 }
2605                 fragi=mb_maps[mbi][0][2];
2606                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2607                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2608                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2609                   cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2610                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2611                   cairo_set_line_width(c,3.);
2612                   cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2613                   cairo_stroke_preserve(c);
2614                   cairo_set_line_width(c,2.);
2615                   cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2616                   cairo_stroke_preserve(c);
2617                   cairo_set_line_width(c,1.);
2618                   cairo_line_to(c,x+4,y+4);
2619                   cairo_stroke(c);
2620                 }
2621                 fragi=mb_maps[mbi][0][3];
2622                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2623                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2624                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2625                   cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2626                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2627                   cairo_set_line_width(c,3.);
2628                   cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2629                   cairo_stroke_preserve(c);
2630                   cairo_set_line_width(c,2.);
2631                   cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2632                   cairo_stroke_preserve(c);
2633                   cairo_set_line_width(c,1.);
2634                   cairo_line_to(c,x+12,y+4);
2635                   cairo_stroke(c);
2636                 }
2637               }break;
2638             }
2639           }
2640         }
2641         /*qii illustration.*/
2642         if(_dec->telemetry_qi&0x2){
2643           cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2644           for(bi=0;bi<4;bi++){
2645             ptrdiff_t fragi;
2646             int       qiv;
2647             int       xp;
2648             int       yp;
2649             xp=x+(bi&1)*8;
2650             yp=y+8-(bi&2)*4;
2651             fragi=mb_maps[mbi][0][bi];
2652             if(fragi>=0&&frags[fragi].coded){
2653               qiv=qim[frags[fragi].qii];
2654               cairo_set_line_width(c,3.);
2655               cairo_set_source_rgba(c,0.,0.,0.,.5);
2656               switch(qiv){
2657                 /*Double plus:*/
2658                 case 2:{
2659                   if((bi&1)^((bi&2)>>1)){
2660                     cairo_move_to(c,xp+2.5,yp+1.5);
2661                     cairo_line_to(c,xp+2.5,yp+3.5);
2662                     cairo_move_to(c,xp+1.5,yp+2.5);
2663                     cairo_line_to(c,xp+3.5,yp+2.5);
2664                     cairo_move_to(c,xp+5.5,yp+4.5);
2665                     cairo_line_to(c,xp+5.5,yp+6.5);
2666                     cairo_move_to(c,xp+4.5,yp+5.5);
2667                     cairo_line_to(c,xp+6.5,yp+5.5);
2668                     cairo_stroke_preserve(c);
2669                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2670                   }
2671                   else{
2672                     cairo_move_to(c,xp+5.5,yp+1.5);
2673                     cairo_line_to(c,xp+5.5,yp+3.5);
2674                     cairo_move_to(c,xp+4.5,yp+2.5);
2675                     cairo_line_to(c,xp+6.5,yp+2.5);
2676                     cairo_move_to(c,xp+2.5,yp+4.5);
2677                     cairo_line_to(c,xp+2.5,yp+6.5);
2678                     cairo_move_to(c,xp+1.5,yp+5.5);
2679                     cairo_line_to(c,xp+3.5,yp+5.5);
2680                     cairo_stroke_preserve(c);
2681                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2682                   }
2683                 }break;
2684                 /*Double minus:*/
2685                 case -2:{
2686                   cairo_move_to(c,xp+2.5,yp+2.5);
2687                   cairo_line_to(c,xp+5.5,yp+2.5);
2688                   cairo_move_to(c,xp+2.5,yp+5.5);
2689                   cairo_line_to(c,xp+5.5,yp+5.5);
2690                   cairo_stroke_preserve(c);
2691                   cairo_set_source_rgba(c,1.,1.,1.,1.);
2692                 }break;
2693                 /*Plus:*/
2694                 case 1:{
2695                   if(bi&2==0)yp-=2;
2696                   if(bi&1==0)xp-=2;
2697                   cairo_move_to(c,xp+4.5,yp+2.5);
2698                   cairo_line_to(c,xp+4.5,yp+6.5);
2699                   cairo_move_to(c,xp+2.5,yp+4.5);
2700                   cairo_line_to(c,xp+6.5,yp+4.5);
2701                   cairo_stroke_preserve(c);
2702                   cairo_set_source_rgba(c,.1,1.,.3,1.);
2703                   break;
2704                 }
2705                 /*Fall through.*/
2706                 /*Minus:*/
2707                 case -1:{
2708                   cairo_move_to(c,xp+2.5,yp+4.5);
2709                   cairo_line_to(c,xp+6.5,yp+4.5);
2710                   cairo_stroke_preserve(c);
2711                   cairo_set_source_rgba(c,1.,.3,.1,1.);
2712                 }break;
2713                 default:continue;
2714               }
2715               cairo_set_line_width(c,1.);
2716               cairo_stroke(c);
2717             }
2718           }
2719         }
2720         col2++;
2721         if((col2>>1)>=_dec->state.nhmbs){
2722           col2=0;
2723           row2+=2;
2724         }
2725       }
2726       /*Bit usage indicator[s]:*/
2727       if(_dec->telemetry_bits){
2728         int widths[6];
2729         int fpsn;
2730         int fpsd;
2731         int mult;
2732         int fullw;
2733         int padw;
2734         int i;
2735         fpsn=_dec->state.info.fps_numerator;
2736         fpsd=_dec->state.info.fps_denominator;
2737         mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2738         fullw=250.f*h*fpsd*mult/fpsn;
2739         padw=w-24;
2740         /*Header and coded block bits.*/
2741         if(_dec->telemetry_frame_bytes<0||
2742          _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2743           _dec->telemetry_frame_bytes=0;
2744         }
2745         if(_dec->telemetry_coding_bytes<0||
2746          _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2747           _dec->telemetry_coding_bytes=0;
2748         }
2749         if(_dec->telemetry_mode_bytes<0||
2750          _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2751           _dec->telemetry_mode_bytes=0;
2752         }
2753         if(_dec->telemetry_mv_bytes<0||
2754          _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2755           _dec->telemetry_mv_bytes=0;
2756         }
2757         if(_dec->telemetry_qi_bytes<0||
2758          _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2759           _dec->telemetry_qi_bytes=0;
2760         }
2761         if(_dec->telemetry_dc_bytes<0||
2762          _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2763           _dec->telemetry_dc_bytes=0;
2764         }
2765         widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2766         widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2767         widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2768         widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2769         widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2770         widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2771         for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2772         cairo_set_source_rgba(c,.0,.0,.0,.6);
2773         cairo_rectangle(c,10,h-33,widths[0]+1,5);
2774         cairo_rectangle(c,10,h-29,widths[1]+1,5);
2775         cairo_rectangle(c,10,h-25,widths[2]+1,5);
2776         cairo_rectangle(c,10,h-21,widths[3]+1,5);
2777         cairo_rectangle(c,10,h-17,widths[4]+1,5);
2778         cairo_rectangle(c,10,h-13,widths[5]+1,5);
2779         cairo_fill(c);
2780         cairo_set_source_rgb(c,1,0,0);
2781         cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2782         cairo_fill(c);
2783         cairo_set_source_rgb(c,0,1,0);
2784         cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2785         cairo_fill(c);
2786         cairo_set_source_rgb(c,0,0,1);
2787         cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2788         cairo_fill(c);
2789         cairo_set_source_rgb(c,.6,.4,.0);
2790         cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2791         cairo_fill(c);
2792         cairo_set_source_rgb(c,.3,.3,.3);
2793         cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2794         cairo_fill(c);
2795         cairo_set_source_rgb(c,.5,.5,.8);
2796         cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2797         cairo_fill(c);
2798       }
2799       /*Master qi indicator[s]:*/
2800       if(_dec->telemetry_qi&0x1){
2801         cairo_text_extents_t extents;
2802         char                 buffer[10];
2803         int                  p;
2804         int                  y;
2805         p=0;
2806         y=h-7.5;
2807         if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2808         buffer[p++]=48+_dec->state.qis[0]%10;
2809         if(_dec->state.nqis>=2){
2810           buffer[p++]=' ';
2811           if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2812           buffer[p++]=48+_dec->state.qis[1]%10;
2813         }
2814         if(_dec->state.nqis==3){
2815           buffer[p++]=' ';
2816           if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2817           buffer[p++]=48+_dec->state.qis[2]%10;
2818         }
2819         buffer[p++]='\0';
2820         cairo_select_font_face(c,"sans",
2821          CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2822         cairo_set_font_size(c,18);
2823         cairo_text_extents(c,buffer,&extents);
2824         cairo_set_source_rgb(c,1,1,1);
2825         cairo_move_to(c,w-extents.x_advance-10,y);
2826         cairo_show_text(c,buffer);
2827         cairo_set_source_rgb(c,0,0,0);
2828         cairo_move_to(c,w-extents.x_advance-10,y);
2829         cairo_text_path(c,buffer);
2830         cairo_set_line_width(c,.8);
2831         cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2832         cairo_stroke(c);
2833       }
2834       cairo_destroy(c);
2835     }
2836     /*Out of the Cairo plane into the telemetry YUV buffer.*/
2837     _ycbcr[0].data=_dec->telemetry_frame_data;
2838     _ycbcr[0].stride=_ycbcr[0].width;
2839     _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2840     _ycbcr[1].stride=_ycbcr[1].width;
2841     _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2842     _ycbcr[2].stride=_ycbcr[2].width;
2843     y_row=_ycbcr[0].data;
2844     u_row=_ycbcr[1].data;
2845     v_row=_ycbcr[2].data;
2846     rgb_row=data;
2847     /*This is one of the few places it's worth handling chroma on a
2848        case-by-case basis.*/
2849     switch(_dec->state.info.pixel_fmt){
2850       case TH_PF_420:{
2851         for(y=0;y<h;y+=2){
2852           unsigned char *y_row2;
2853           unsigned char *rgb_row2;
2854           y_row2=y_row+_ycbcr[0].stride;
2855           rgb_row2=rgb_row+cstride;
2856           for(x=0;x<w;x+=2){
2857             int y;
2858             int u;
2859             int v;
2860             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2861              +24966*rgb_row[4*x+0]+4207500)/255000;
2862             y_row[x]=OC_CLAMP255(y);
2863             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2864              +24966*rgb_row[4*x+4]+4207500)/255000;
2865             y_row[x+1]=OC_CLAMP255(y);
2866             y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2867              +24966*rgb_row2[4*x+0]+4207500)/255000;
2868             y_row2[x]=OC_CLAMP255(y);
2869             y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2870              +24966*rgb_row2[4*x+4]+4207500)/255000;
2871             y_row2[x+1]=OC_CLAMP255(y);
2872             u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2873              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2874              -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2875              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2876              +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2877              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2878             v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2879              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2880              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2881               +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2882              -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2883               +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2884             u_row[x>>1]=OC_CLAMP255(u);
2885             v_row[x>>1]=OC_CLAMP255(v);
2886           }
2887           y_row+=_ycbcr[0].stride<<1;
2888           u_row+=_ycbcr[1].stride;
2889           v_row+=_ycbcr[2].stride;
2890           rgb_row+=cstride<<1;
2891         }
2892       }break;
2893       case TH_PF_422:{
2894         for(y=0;y<h;y++){
2895           for(x=0;x<w;x+=2){
2896             int y;
2897             int u;
2898             int v;
2899             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2900              +24966*rgb_row[4*x+0]+4207500)/255000;
2901             y_row[x]=OC_CLAMP255(y);
2902             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2903              +24966*rgb_row[4*x+4]+4207500)/255000;
2904             y_row[x+1]=OC_CLAMP255(y);
2905             u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2906              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2907              +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2908             v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2909              -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2910              -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2911             u_row[x>>1]=OC_CLAMP255(u);
2912             v_row[x>>1]=OC_CLAMP255(v);
2913           }
2914           y_row+=_ycbcr[0].stride;
2915           u_row+=_ycbcr[1].stride;
2916           v_row+=_ycbcr[2].stride;
2917           rgb_row+=cstride;
2918         }
2919       }break;
2920       /*case TH_PF_444:*/
2921       default:{
2922         for(y=0;y<h;y++){
2923           for(x=0;x<w;x++){
2924             int y;
2925             int u;
2926             int v;
2927             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2928              +24966*rgb_row[4*x+0]+4207500)/255000;
2929             u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2930              +99232*rgb_row[4*x+0]+29032005)/225930;
2931             v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2932              -25536*rgb_row[4*x+0]+45940035)/357510;
2933             y_row[x]=OC_CLAMP255(y);
2934             u_row[x]=OC_CLAMP255(u);
2935             v_row[x]=OC_CLAMP255(v);
2936           }
2937           y_row+=_ycbcr[0].stride;
2938           u_row+=_ycbcr[1].stride;
2939           v_row+=_ycbcr[2].stride;
2940           rgb_row+=cstride;
2941         }
2942       }break;
2943     }
2944     /*Finished.
2945       Destroy the surface.*/
2946     cairo_surface_destroy(cs);
2947   }
2948 #endif
2949   return 0;
2950 }