93b806c3cf22b4828327843992436ca677b1798e
[theora.git] / lib / decode.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12
13   function:
14     last mod: $Id$
15
16  ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296
297
298
299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327
328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358
359
360
361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367
368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404        _dec->state.dequant_tables[qi][pli][qti][17]+
405        _dec->state.dequant_tables[qi][pli][qti][18]+
406        _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry=0;
421   _dec->telemetry_bits=0;
422   _dec->telemetry_qi=0;
423   _dec->telemetry_mbmode=0;
424   _dec->telemetry_mv=0;
425   _dec->telemetry_frame_data=NULL;
426 #endif
427   return 0;
428 }
429
430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432   _ogg_free(_dec->telemetry_frame_data);
433 #endif
434   _ogg_free(_dec->pp_frame_data);
435   _ogg_free(_dec->variances);
436   _ogg_free(_dec->dc_qis);
437   _ogg_free(_dec->dct_tokens);
438   oc_huff_trees_clear(_dec->huff_tables);
439   oc_state_clear(&_dec->state);
440 }
441
442
443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444   long val;
445   /*Check to make sure this is a data packet.*/
446   val=oc_pack_read1(&_dec->opb);
447   if(val!=0)return TH_EBADPACKET;
448   /*Read in the frame type (I or P).*/
449   val=oc_pack_read1(&_dec->opb);
450   _dec->state.frame_type=(int)val;
451   /*Read in the qi list.*/
452   val=oc_pack_read(&_dec->opb,6);
453   _dec->state.qis[0]=(unsigned char)val;
454   val=oc_pack_read1(&_dec->opb);
455   if(!val)_dec->state.nqis=1;
456   else{
457     val=oc_pack_read(&_dec->opb,6);
458     _dec->state.qis[1]=(unsigned char)val;
459     val=oc_pack_read1(&_dec->opb);
460     if(!val)_dec->state.nqis=2;
461     else{
462       val=oc_pack_read(&_dec->opb,6);
463       _dec->state.qis[2]=(unsigned char)val;
464       _dec->state.nqis=3;
465     }
466   }
467   if(_dec->state.frame_type==OC_INTRA_FRAME){
468     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469       Most of the other unused bits in the VP3 headers were eliminated.
470       I don't know why these remain.*/
471     /*I wanted to eliminate wasted bits, but not all config wiggle room
472        --Monty.*/
473     val=oc_pack_read(&_dec->opb,3);
474     if(val!=0)return TH_EIMPL;
475   }
476   return 0;
477 }
478
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480   This also builds up the coded fragment list (in coded order), and clears the
481    uncoded fragment list.
482   It does not update the coded macro block list nor the super block flags, as
483    those are not used when decoding INTRA frames.*/
484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485   const oc_sb_map   *sb_maps;
486   const oc_sb_flags *sb_flags;
487   oc_fragment       *frags;
488   ptrdiff_t         *coded_fragis;
489   ptrdiff_t          ncoded_fragis;
490   ptrdiff_t          prev_ncoded_fragis;
491   unsigned           nsbs;
492   unsigned           sbi;
493   int                pli;
494   coded_fragis=_dec->state.coded_fragis;
495   prev_ncoded_fragis=ncoded_fragis=0;
496   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497   sb_flags=_dec->state.sb_flags;
498   frags=_dec->state.frags;
499   sbi=nsbs=0;
500   for(pli=0;pli<3;pli++){
501     nsbs+=_dec->state.fplanes[pli].nsbs;
502     for(;sbi<nsbs;sbi++){
503       int quadi;
504       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505         int bi;
506         for(bi=0;bi<4;bi++){
507           ptrdiff_t fragi;
508           fragi=sb_maps[sbi][quadi][bi];
509           if(fragi>=0){
510             frags[fragi].coded=1;
511             frags[fragi].refi=OC_FRAME_SELF;
512             frags[fragi].mb_mode=OC_MODE_INTRA;
513             coded_fragis[ncoded_fragis++]=fragi;
514           }
515         }
516       }
517     }
518     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519     prev_ncoded_fragis=ncoded_fragis;
520   }
521   _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523
524 /*Decodes the bit flags indicating whether each super block is partially coded
525    or not.
526   Return: The number of partially coded super blocks.*/
527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528   oc_sb_flags *sb_flags;
529   unsigned     nsbs;
530   unsigned     sbi;
531   unsigned     npartial;
532   unsigned     run_count;
533   long         val;
534   int          flag;
535   val=oc_pack_read1(&_dec->opb);
536   flag=(int)val;
537   sb_flags=_dec->state.sb_flags;
538   nsbs=_dec->state.nsbs;
539   sbi=npartial=0;
540   while(sbi<nsbs){
541     int full_run;
542     run_count=oc_sb_run_unpack(&_dec->opb);
543     full_run=run_count>=4129;
544     do{
545       sb_flags[sbi].coded_partially=flag;
546       sb_flags[sbi].coded_fully=0;
547       npartial+=flag;
548       sbi++;
549     }
550     while(--run_count>0&&sbi<nsbs);
551     if(full_run&&sbi<nsbs){
552       val=oc_pack_read1(&_dec->opb);
553       flag=(int)val;
554     }
555     else flag=!flag;
556   }
557   /*TODO: run_count should be 0 here.
558     If it's not, we should issue a warning of some kind.*/
559   return npartial;
560 }
561
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563    block is fully coded or not.
564   This function should only be called if there is at least one
565    non-partially-coded super block.
566   Return: The number of partially coded super blocks.*/
567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568   oc_sb_flags *sb_flags;
569   unsigned     nsbs;
570   unsigned     sbi;
571   unsigned     run_count;
572   long         val;
573   int          flag;
574   sb_flags=_dec->state.sb_flags;
575   nsbs=_dec->state.nsbs;
576   /*Skip partially coded super blocks.*/
577   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578   val=oc_pack_read1(&_dec->opb);
579   flag=(int)val;
580   do{
581     int full_run;
582     run_count=oc_sb_run_unpack(&_dec->opb);
583     full_run=run_count>=4129;
584     for(;sbi<nsbs;sbi++){
585       if(sb_flags[sbi].coded_partially)continue;
586       if(run_count--<=0)break;
587       sb_flags[sbi].coded_fully=flag;
588     }
589     if(full_run&&sbi<nsbs){
590       val=oc_pack_read1(&_dec->opb);
591       flag=(int)val;
592     }
593     else flag=!flag;
594   }
595   while(sbi<nsbs);
596   /*TODO: run_count should be 0 here.
597     If it's not, we should issue a warning of some kind.*/
598 }
599
600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601   const oc_sb_map   *sb_maps;
602   const oc_sb_flags *sb_flags;
603   signed char       *mb_modes;
604   oc_fragment       *frags;
605   unsigned           nsbs;
606   unsigned           sbi;
607   unsigned           npartial;
608   long               val;
609   int                pli;
610   int                flag;
611   int                run_count;
612   ptrdiff_t         *coded_fragis;
613   ptrdiff_t         *uncoded_fragis;
614   ptrdiff_t          ncoded_fragis;
615   ptrdiff_t          nuncoded_fragis;
616   ptrdiff_t          prev_ncoded_fragis;
617   npartial=oc_dec_partial_sb_flags_unpack(_dec);
618   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619   if(npartial>0){
620     val=oc_pack_read1(&_dec->opb);
621     flag=!(int)val;
622   }
623   else flag=0;
624   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625   sb_flags=_dec->state.sb_flags;
626   mb_modes=_dec->state.mb_modes;
627   frags=_dec->state.frags;
628   sbi=nsbs=run_count=0;
629   coded_fragis=_dec->state.coded_fragis;
630   uncoded_fragis=coded_fragis+_dec->state.nfrags;
631   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632   for(pli=0;pli<3;pli++){
633     nsbs+=_dec->state.fplanes[pli].nsbs;
634     for(;sbi<nsbs;sbi++){
635       int quadi;
636       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637         int quad_coded;
638         int bi;
639         quad_coded=0;
640         for(bi=0;bi<4;bi++){
641           ptrdiff_t fragi;
642           fragi=sb_maps[sbi][quadi][bi];
643           if(fragi>=0){
644             int coded;
645             if(sb_flags[sbi].coded_fully)coded=1;
646             else if(!sb_flags[sbi].coded_partially)coded=0;
647             else{
648               if(run_count<=0){
649                 run_count=oc_block_run_unpack(&_dec->opb);
650                 flag=!flag;
651               }
652               run_count--;
653               coded=flag;
654             }
655             if(coded)coded_fragis[ncoded_fragis++]=fragi;
656             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657             quad_coded|=coded;
658             frags[fragi].coded=coded;
659             frags[fragi].refi=OC_FRAME_NONE;
660           }
661         }
662         /*Remember if there's a coded luma block in this macro block.*/
663         if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664       }
665     }
666     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667     prev_ncoded_fragis=ncoded_fragis;
668   }
669   _dec->state.ntotal_coded_fragis=ncoded_fragis;
670   /*TODO: run_count should be 0 here.
671     If it's not, we should issue a warning of some kind.*/
672 }
673
674
675 /*Coding scheme:
676    Codeword            Mode Index
677    0                       0
678    10                      1
679    110                     2
680    1110                    3
681    11110                   4
682    111110                  5
683    1111110                 6
684    1111111                 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686   4,
687    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691     3,
692      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697   3,
698    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701
702 /*Unpacks the list of macro block modes for INTER frames.*/
703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704   signed char         *mb_modes;
705   const unsigned char *alphabet;
706   unsigned char        scheme0_alphabet[8];
707   const ogg_int16_t   *mode_tree;
708   size_t               nmbs;
709   size_t               mbi;
710   long                 val;
711   int                  mode_scheme;
712   val=oc_pack_read(&_dec->opb,3);
713   mode_scheme=(int)val;
714   if(mode_scheme==0){
715     int mi;
716     /*Just in case, initialize the modes to something.
717       If the bitstream doesn't contain each index exactly once, it's likely
718        corrupt and the rest of the packet is garbage anyway, but this way we
719        won't crash, and we'll decode SOMETHING.*/
720     /*LOOP VECTORIZES*/
721     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722     for(mi=0;mi<OC_NMODES;mi++){
723       val=oc_pack_read(&_dec->opb,3);
724       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725     }
726     alphabet=scheme0_alphabet;
727   }
728   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730   mb_modes=_dec->state.mb_modes;
731   nmbs=_dec->state.nmbs;
732   for(mbi=0;mbi<nmbs;mbi++){
733     if(mb_modes[mbi]>0){
734       /*We have a coded luma block; decode a mode.*/
735       mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736     }
737     /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738        fact that OC_MODE_INTER_NOMV is already 0.*/
739   }
740 }
741
742
743
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745   5,
746    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751    33,          36,          39,          42,
752    45,          50,          55,          60,
753    65,          74,          83,          92,
754     1,-(1<<8|32+4),-(1<<8|32-4),
755     1,-(1<<8|32+5),-(1<<8|32-5),
756     1,-(1<<8|32+6),-(1<<8|32-6),
757     1,-(1<<8|32+7),-(1<<8|32-7),
758     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762     3,
763      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765     3,
766      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768     3,
769      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771     3,
772      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777   6,
778    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795
796
797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798   int dx;
799   int dy;
800   dx=oc_huff_token_decode(_opb,_tree)-32;
801   dy=oc_huff_token_decode(_opb,_tree)-32;
802   return OC_MV(dx,dy);
803 }
804
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806    block modes and motion vectors to the individual fragments.*/
807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808   const oc_mb_map        *mb_maps;
809   const signed char      *mb_modes;
810   oc_set_chroma_mvs_func  set_chroma_mvs;
811   const ogg_int16_t      *mv_comp_tree;
812   oc_fragment            *frags;
813   oc_mv                  *frag_mvs;
814   const unsigned char    *map_idxs;
815   int                     map_nidxs;
816   oc_mv                   last_mv;
817   oc_mv                   prior_mv;
818   oc_mv                   cbmvs[4];
819   size_t                  nmbs;
820   size_t                  mbi;
821   long                    val;
822   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823   val=oc_pack_read1(&_dec->opb);
824   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827   prior_mv=last_mv=0;
828   frags=_dec->state.frags;
829   frag_mvs=_dec->state.frag_mvs;
830   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831   mb_modes=_dec->state.mb_modes;
832   nmbs=_dec->state.nmbs;
833   for(mbi=0;mbi<nmbs;mbi++){
834     int mb_mode;
835     mb_mode=mb_modes[mbi];
836     if(mb_mode!=OC_MODE_INVALID){
837       oc_mv     mbmv;
838       ptrdiff_t fragi;
839       int       mapi;
840       int       mapii;
841       int       refi;
842       if(mb_mode==OC_MODE_INTER_MV_FOUR){
843         oc_mv lbmvs[4];
844         int   bi;
845         prior_mv=last_mv;
846         for(bi=0;bi<4;bi++){
847           fragi=mb_maps[mbi][0][bi];
848           if(frags[fragi].coded){
849             frags[fragi].refi=OC_FRAME_PREV;
850             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851             lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852             frag_mvs[fragi]=lbmvs[bi];
853           }
854           else lbmvs[bi]=0;
855         }
856         (*set_chroma_mvs)(cbmvs,lbmvs);
857         for(mapii=4;mapii<map_nidxs;mapii++){
858           mapi=map_idxs[mapii];
859           bi=mapi&3;
860           fragi=mb_maps[mbi][mapi>>2][bi];
861           if(frags[fragi].coded){
862             frags[fragi].refi=OC_FRAME_PREV;
863             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864             frag_mvs[fragi]=cbmvs[bi];
865           }
866         }
867       }
868       else{
869         switch(mb_mode){
870           case OC_MODE_INTER_MV:{
871             prior_mv=last_mv;
872             last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873           }break;
874           case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875           case OC_MODE_INTER_MV_LAST2:{
876             mbmv=prior_mv;
877             prior_mv=last_mv;
878             last_mv=mbmv;
879           }break;
880           case OC_MODE_GOLDEN_MV:{
881             mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882           }break;
883           default:mbmv=0;break;
884         }
885         /*Fill in the MVs for the fragments.*/
886         refi=OC_FRAME_FOR_MODE(mb_mode);
887         mapii=0;
888         do{
889           mapi=map_idxs[mapii];
890           fragi=mb_maps[mbi][mapi>>2][mapi&3];
891           if(frags[fragi].coded){
892             frags[fragi].refi=refi;
893             frags[fragi].mb_mode=mb_mode;
894             frag_mvs[fragi]=mbmv;
895           }
896         }
897         while(++mapii<map_nidxs);
898       }
899     }
900   }
901 }
902
903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904   oc_fragment     *frags;
905   const ptrdiff_t *coded_fragis;
906   ptrdiff_t        ncoded_fragis;
907   ptrdiff_t        fragii;
908   ptrdiff_t        fragi;
909   ncoded_fragis=_dec->state.ntotal_coded_fragis;
910   if(ncoded_fragis<=0)return;
911   frags=_dec->state.frags;
912   coded_fragis=_dec->state.coded_fragis;
913   if(_dec->state.nqis==1){
914     /*If this frame has only a single qi value, then just use it for all coded
915        fragments.*/
916     for(fragii=0;fragii<ncoded_fragis;fragii++){
917       frags[coded_fragis[fragii]].qii=0;
918     }
919   }
920   else{
921     long val;
922     int  flag;
923     int  nqi1;
924     int  run_count;
925     /*Otherwise, we decode a qi index for each fragment, using two passes of
926       the same binary RLE scheme used for super-block coded bits.
927      The first pass marks each fragment as having a qii of 0 or greater than
928       0, and the second pass (if necessary), distinguishes between a qii of
929       1 and 2.
930      At first we just store the qii in the fragment.
931      After all the qii's are decoded, we make a final pass to replace them
932       with the corresponding qi's for this frame.*/
933     val=oc_pack_read1(&_dec->opb);
934     flag=(int)val;
935     nqi1=0;
936     fragii=0;
937     while(fragii<ncoded_fragis){
938       int full_run;
939       run_count=oc_sb_run_unpack(&_dec->opb);
940       full_run=run_count>=4129;
941       do{
942         frags[coded_fragis[fragii++]].qii=flag;
943         nqi1+=flag;
944       }
945       while(--run_count>0&&fragii<ncoded_fragis);
946       if(full_run&&fragii<ncoded_fragis){
947         val=oc_pack_read1(&_dec->opb);
948         flag=(int)val;
949       }
950       else flag=!flag;
951     }
952     /*TODO: run_count should be 0 here.
953       If it's not, we should issue a warning of some kind.*/
954     /*If we have 3 different qi's for this frame, and there was at least one
955        fragment with a non-zero qi, make the second pass.*/
956     if(_dec->state.nqis==3&&nqi1>0){
957       /*Skip qii==0 fragments.*/
958       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959       val=oc_pack_read1(&_dec->opb);
960       flag=(int)val;
961       do{
962         int full_run;
963         run_count=oc_sb_run_unpack(&_dec->opb);
964         full_run=run_count>=4129;
965         for(;fragii<ncoded_fragis;fragii++){
966           fragi=coded_fragis[fragii];
967           if(frags[fragi].qii==0)continue;
968           if(run_count--<=0)break;
969           frags[fragi].qii+=flag;
970         }
971         if(full_run&&fragii<ncoded_fragis){
972           val=oc_pack_read1(&_dec->opb);
973           flag=(int)val;
974         }
975         else flag=!flag;
976       }
977       while(fragii<ncoded_fragis);
978       /*TODO: run_count should be 0 here.
979         If it's not, we should issue a warning of some kind.*/
980     }
981   }
982 }
983
984
985
986 /*Unpacks the DC coefficient tokens.
987   Unlike when unpacking the AC coefficient tokens, we actually need to decode
988    the DC coefficient values now so that we can do DC prediction.
989   _huff_idx:   The index of the Huffman table to use for each color plane.
990   _ntoks_left: The number of tokens left to be decoded in each color plane for
991                 each coefficient.
992                This is updated as EOB tokens and zero run tokens are decoded.
993   Return: The length of any outstanding EOB run.*/
994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995  ptrdiff_t _ntoks_left[3][64]){
996   unsigned char   *dct_tokens;
997   oc_fragment     *frags;
998   const ptrdiff_t *coded_fragis;
999   ptrdiff_t        ncoded_fragis;
1000   ptrdiff_t        fragii;
1001   ptrdiff_t        eobs;
1002   ptrdiff_t        ti;
1003   int              pli;
1004   dct_tokens=_dec->dct_tokens;
1005   frags=_dec->state.frags;
1006   coded_fragis=_dec->state.coded_fragis;
1007   ncoded_fragis=fragii=eobs=ti=0;
1008   for(pli=0;pli<3;pli++){
1009     ptrdiff_t run_counts[64];
1010     ptrdiff_t eob_count;
1011     ptrdiff_t eobi;
1012     int       rli;
1013     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014     memset(run_counts,0,sizeof(run_counts));
1015     _dec->eob_runs[pli][0]=eobs;
1016     _dec->ti0[pli][0]=ti;
1017     /*Continue any previous EOB run, if there was one.*/
1018     eobi=eobs;
1019     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020     eob_count=eobi;
1021     eobs-=eobi;
1022     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023     while(fragii<ncoded_fragis){
1024       int token;
1025       int cw;
1026       int eb;
1027       int skip;
1028       token=oc_huff_token_decode(&_dec->opb,
1029        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030       dct_tokens[ti++]=(unsigned char)token;
1031       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032         eb=(int)oc_pack_read(&_dec->opb,
1033          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034         dct_tokens[ti++]=(unsigned char)eb;
1035         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036         eb<<=OC_DCT_TOKEN_EB_POS(token);
1037       }
1038       else eb=0;
1039       cw=OC_DCT_CODE_WORD[token]+eb;
1040       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042       if(eobs){
1043         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044         eob_count+=eobi;
1045         eobs-=eobi;
1046         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047       }
1048       else{
1049         int coeff;
1050         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053         if(skip)coeff=0;
1054         run_counts[skip]++;
1055         frags[coded_fragis[fragii++]].dc=coeff;
1056       }
1057     }
1058     /*Add the total EOB count to the longest run length.*/
1059     run_counts[63]+=eob_count;
1060     /*And convert the run_counts array to a moment table.*/
1061     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062     /*Finally, subtract off the number of coefficients that have been
1063        accounted for by runs started in this coefficient.*/
1064     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065   }
1066   _dec->dct_tokens_count=ti;
1067   return eobs;
1068 }
1069
1070 /*Unpacks the AC coefficient tokens.
1071   This can completely discard coefficient values while unpacking, and so is
1072    somewhat simpler than unpacking the DC coefficient tokens.
1073   _huff_idx:   The index of the Huffman table to use for each color plane.
1074   _ntoks_left: The number of tokens left to be decoded in each color plane for
1075                 each coefficient.
1076                This is updated as EOB tokens and zero run tokens are decoded.
1077   _eobs:       The length of any outstanding EOB run from previous
1078                 coefficients.
1079   Return: The length of any outstanding EOB run.*/
1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082   unsigned char *dct_tokens;
1083   ptrdiff_t      ti;
1084   int            pli;
1085   dct_tokens=_dec->dct_tokens;
1086   ti=_dec->dct_tokens_count;
1087   for(pli=0;pli<3;pli++){
1088     ptrdiff_t run_counts[64];
1089     ptrdiff_t eob_count;
1090     size_t    ntoks_left;
1091     size_t    ntoks;
1092     int       rli;
1093     _dec->eob_runs[pli][_zzi]=_eobs;
1094     _dec->ti0[pli][_zzi]=ti;
1095     ntoks_left=_ntoks_left[pli][_zzi];
1096     memset(run_counts,0,sizeof(run_counts));
1097     eob_count=0;
1098     ntoks=0;
1099     while(ntoks+_eobs<ntoks_left){
1100       int token;
1101       int cw;
1102       int eb;
1103       int skip;
1104       ntoks+=_eobs;
1105       eob_count+=_eobs;
1106       token=oc_huff_token_decode(&_dec->opb,
1107        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108       dct_tokens[ti++]=(unsigned char)token;
1109       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110         eb=(int)oc_pack_read(&_dec->opb,
1111          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112         dct_tokens[ti++]=(unsigned char)eb;
1113         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114         eb<<=OC_DCT_TOKEN_EB_POS(token);
1115       }
1116       else eb=0;
1117       cw=OC_DCT_CODE_WORD[token]+eb;
1118       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121       if(_eobs==0){
1122         run_counts[skip]++;
1123         ntoks++;
1124       }
1125     }
1126     /*Add the portion of the last EOB run actually used by this coefficient.*/
1127     eob_count+=ntoks_left-ntoks;
1128     /*And remove it from the remaining EOB count.*/
1129     _eobs-=ntoks_left-ntoks;
1130     /*Add the total EOB count to the longest run length.*/
1131     run_counts[63]+=eob_count;
1132     /*And convert the run_counts array to a moment table.*/
1133     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134     /*Finally, subtract off the number of coefficients that have been
1135        accounted for by runs started in this coefficient.*/
1136     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137   }
1138   _dec->dct_tokens_count=ti;
1139   return _eobs;
1140 }
1141
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143    stored in the bitstream grouped by coefficient, not by fragment.
1144
1145   This means that we either decode all the tokens in order, building up a
1146    separate coefficient list for each fragment as we go, and then go back and
1147    do the iDCT on each fragment, or we have to create separate lists of tokens
1148    for each coefficient, so that we can pull the next token required off the
1149    head of the appropriate list when decoding a specific fragment.
1150
1151   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152    decoded coefficient values.
1153
1154   We take the second option, which lets us store just one to three bytes per
1155    token (generally far fewer than the number of coefficients, due to EOB
1156    tokens and zero runs), and which requires us to only maintain a counter for
1157    each of the 64 coefficients, instead of a counter for every fragment to
1158    determine where the next token goes.
1159
1160   We actually use 3 counters per coefficient, one for each color plane, so we
1161    can decode all color planes simultaneously.
1162   This lets color conversion, etc., be done as soon as a full MCU (one or
1163    two super block rows) is decoded, while the image data is still in cache.*/
1164
1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167   ptrdiff_t  ntoks_left[3][64];
1168   int        huff_idxs[2];
1169   ptrdiff_t  eobs;
1170   long       val;
1171   int        pli;
1172   int        zzi;
1173   int        hgi;
1174   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176   }
1177   val=oc_pack_read(&_dec->opb,4);
1178   huff_idxs[0]=(int)val;
1179   val=oc_pack_read(&_dec->opb,4);
1180   huff_idxs[1]=(int)val;
1181   _dec->eob_runs[0][0]=0;
1182   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186   val=oc_pack_read(&_dec->opb,4);
1187   huff_idxs[0]=(int)val;
1188   val=oc_pack_read(&_dec->opb,4);
1189   huff_idxs[1]=(int)val;
1190   zzi=1;
1191   for(hgi=1;hgi<5;hgi++){
1192     huff_idxs[0]+=16;
1193     huff_idxs[1]+=16;
1194     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196     }
1197   }
1198   /*TODO: eobs should be exactly zero, or 4096 or greater.
1199     The second case occurs when an EOB run of size zero is encountered, which
1200      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201     If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203
1204
1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206   /*pp_level 0: disabled; free any memory used and return*/
1207   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1208     if(_dec->dc_qis!=NULL){
1209       _ogg_free(_dec->dc_qis);
1210       _dec->dc_qis=NULL;
1211       _ogg_free(_dec->variances);
1212       _dec->variances=NULL;
1213       _ogg_free(_dec->pp_frame_data);
1214       _dec->pp_frame_data=NULL;
1215     }
1216     return 1;
1217   }
1218   if(_dec->dc_qis==NULL){
1219     /*If we haven't been tracking DC quantization indices, there's no point in
1220        starting now.*/
1221     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1222     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1223      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1224     if(_dec->dc_qis==NULL)return 1;
1225     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1226   }
1227   else{
1228     unsigned char   *dc_qis;
1229     const ptrdiff_t *coded_fragis;
1230     ptrdiff_t        ncoded_fragis;
1231     ptrdiff_t        fragii;
1232     unsigned char    qi0;
1233     /*Update the DC quantization index of each coded block.*/
1234     dc_qis=_dec->dc_qis;
1235     coded_fragis=_dec->state.coded_fragis;
1236     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1237      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1238     qi0=(unsigned char)_dec->state.qis[0];
1239     for(fragii=0;fragii<ncoded_fragis;fragii++){
1240       dc_qis[coded_fragis[fragii]]=qi0;
1241     }
1242   }
1243   /*pp_level 1: Stop after updating DC quantization indices.*/
1244   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1245     if(_dec->variances!=NULL){
1246       _ogg_free(_dec->variances);
1247       _dec->variances=NULL;
1248       _ogg_free(_dec->pp_frame_data);
1249       _dec->pp_frame_data=NULL;
1250     }
1251     return 1;
1252   }
1253   if(_dec->variances==NULL){
1254     size_t frame_sz;
1255     size_t c_sz;
1256     int    c_w;
1257     int    c_h;
1258     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1259     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1260     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1261     c_sz=c_w*(size_t)c_h;
1262     /*Allocate space for the chroma planes, even if we're not going to use
1263        them; this simplifies allocation state management, though it may waste
1264        memory on the few systems that don't overcommit pages.*/
1265     frame_sz+=c_sz<<1;
1266     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1267      frame_sz*sizeof(_dec->pp_frame_data[0]));
1268     _dec->variances=(int *)_ogg_malloc(
1269      _dec->state.nfrags*sizeof(_dec->variances[0]));
1270     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1271       _ogg_free(_dec->pp_frame_data);
1272       _dec->pp_frame_data=NULL;
1273       _ogg_free(_dec->variances);
1274       _dec->variances=NULL;
1275       return 1;
1276     }
1277     /*Force an update of the PP buffer pointers.*/
1278     _dec->pp_frame_state=0;
1279   }
1280   /*Update the PP buffer pointers if necessary.*/
1281   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1282     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1283       /*If chroma processing is disabled, just use the PP luma plane.*/
1284       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1285       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1286       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1287       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1288        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1289     }
1290     else{
1291       size_t y_sz;
1292       size_t c_sz;
1293       int    c_w;
1294       int    c_h;
1295       /*Otherwise, set up pointers to all three PP planes.*/
1296       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1297       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1298       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1299       c_sz=c_w*(size_t)c_h;
1300       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1301       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1302       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1303       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1304       _dec->pp_frame_buf[1].width=c_w;
1305       _dec->pp_frame_buf[1].height=c_h;
1306       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1307       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1308       _dec->pp_frame_buf[2].width=c_w;
1309       _dec->pp_frame_buf[2].height=c_h;
1310       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1311       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1312       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1313     }
1314     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1315   }
1316   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1317   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1318     memcpy(_dec->pp_frame_buf+1,
1319      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1320      sizeof(_dec->pp_frame_buf[1])*2);
1321   }
1322   return 0;
1323 }
1324
1325
1326 /*Initialize the main decoding pipeline.*/
1327 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1328  oc_dec_pipeline_state *_pipe){
1329   const ptrdiff_t *coded_fragis;
1330   const ptrdiff_t *uncoded_fragis;
1331   int              flimit;
1332   int              pli;
1333   int              qii;
1334   int              qti;
1335   int              zzi;
1336   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1337      super block rows of Y' for each super block row of Cb and Cr.*/
1338   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1339   /*Initialize the token and extra bits indices for each plane and
1340      coefficient.*/
1341   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1342   /*Also copy over the initial the EOB run counts.*/
1343   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1344   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1345   coded_fragis=_dec->state.coded_fragis;
1346   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1347   for(pli=0;pli<3;pli++){
1348     ptrdiff_t ncoded_fragis;
1349     _pipe->coded_fragis[pli]=coded_fragis;
1350     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1351     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1352     coded_fragis+=ncoded_fragis;
1353     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1354   }
1355   /*Set up condensed quantizer tables.*/
1356   for(pli=0;pli<3;pli++){
1357     for(qii=0;qii<_dec->state.nqis;qii++){
1358       for(qti=0;qti<2;qti++){
1359         _pipe->dequant[pli][qii][qti]=
1360          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1361       }
1362     }
1363   }
1364   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1365   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1366   /*Initialize the bounding value array for the loop filter.*/
1367   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1368   _pipe->loop_filter=flimit!=0;
1369   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1370   /*Initialize any buffers needed for post-processing.
1371     We also save the current post-processing level, to guard against the user
1372      changing it from a callback.*/
1373   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1374   /*If we don't have enough information to post-process, disable it, regardless
1375      of the user-requested level.*/
1376   else{
1377     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1378     memcpy(_dec->pp_frame_buf,
1379      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1380      sizeof(_dec->pp_frame_buf[0])*3);
1381   }
1382   /*Clear down the DCT coefficient buffer for the first block.*/
1383   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1384 }
1385
1386 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1387    rows).
1388   As a side effect, the number of coded and uncoded fragments in this plane of
1389    the MCU is also computed.*/
1390 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1391  oc_dec_pipeline_state *_pipe,int _pli){
1392   const oc_fragment_plane *fplane;
1393   oc_fragment             *frags;
1394   int                     *pred_last;
1395   ptrdiff_t                ncoded_fragis;
1396   ptrdiff_t                fragi;
1397   int                      fragx;
1398   int                      fragy;
1399   int                      fragy0;
1400   int                      fragy_end;
1401   int                      nhfrags;
1402   /*Compute the first and last fragment row of the current MCU for this
1403      plane.*/
1404   fplane=_dec->state.fplanes+_pli;
1405   fragy0=_pipe->fragy0[_pli];
1406   fragy_end=_pipe->fragy_end[_pli];
1407   nhfrags=fplane->nhfrags;
1408   pred_last=_pipe->pred_last[_pli];
1409   frags=_dec->state.frags;
1410   ncoded_fragis=0;
1411   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1412   for(fragy=fragy0;fragy<fragy_end;fragy++){
1413     if(fragy==0){
1414       /*For the first row, all of the cases reduce to just using the previous
1415          predictor for the same reference frame.*/
1416       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1417         if(frags[fragi].coded){
1418           int refi;
1419           refi=frags[fragi].refi;
1420           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1421           ncoded_fragis++;
1422         }
1423       }
1424     }
1425     else{
1426       oc_fragment *u_frags;
1427       int          l_ref;
1428       int          ul_ref;
1429       int          u_ref;
1430       u_frags=frags-nhfrags;
1431       l_ref=-1;
1432       ul_ref=-1;
1433       u_ref=u_frags[fragi].refi;
1434       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1435         int ur_ref;
1436         if(fragx+1>=nhfrags)ur_ref=-1;
1437         else ur_ref=u_frags[fragi+1].refi;
1438         if(frags[fragi].coded){
1439           int pred;
1440           int refi;
1441           refi=frags[fragi].refi;
1442           /*We break out a separate case based on which of our neighbors use
1443              the same reference frames.
1444             This is somewhat faster than trying to make a generic case which
1445              handles all of them, since it reduces lots of poorly predicted
1446              jumps to one switch statement, and also lets a number of the
1447              multiplications be optimized out by strength reduction.*/
1448           switch((l_ref==refi)|(ul_ref==refi)<<1|
1449            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1450             default:pred=pred_last[refi];break;
1451             case  1:
1452             case  3:pred=frags[fragi-1].dc;break;
1453             case  2:pred=u_frags[fragi-1].dc;break;
1454             case  4:
1455             case  6:
1456             case 12:pred=u_frags[fragi].dc;break;
1457             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1458             case  8:pred=u_frags[fragi+1].dc;break;
1459             case  9:
1460             case 11:
1461             case 13:{
1462               /*The TI compiler mis-compiles this line.*/
1463               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1464             }break;
1465             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1466             case 14:{
1467               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1468                +10*u_frags[fragi].dc)/16;
1469             }break;
1470             case  7:
1471             case 15:{
1472               int p0;
1473               int p1;
1474               int p2;
1475               p0=frags[fragi-1].dc;
1476               p1=u_frags[fragi-1].dc;
1477               p2=u_frags[fragi].dc;
1478               pred=(29*(p0+p2)-26*p1)/32;
1479               if(abs(pred-p2)>128)pred=p2;
1480               else if(abs(pred-p0)>128)pred=p0;
1481               else if(abs(pred-p1)>128)pred=p1;
1482             }break;
1483           }
1484           pred_last[refi]=frags[fragi].dc+=pred;
1485           ncoded_fragis++;
1486           l_ref=refi;
1487         }
1488         else l_ref=-1;
1489         ul_ref=u_ref;
1490         u_ref=ur_ref;
1491       }
1492     }
1493   }
1494   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1495   /*Also save the number of uncoded fragments so we know how many to copy.*/
1496   _pipe->nuncoded_fragis[_pli]=
1497    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1498 }
1499
1500 /*Reconstructs all coded fragments in a single MCU (one or two super block
1501    rows).
1502   This requires that each coded fragment have a proper macro block mode and
1503    motion vector (if not in INTRA mode), and have its DC value decoded, with
1504    the DC prediction process reversed, and the number of coded and uncoded
1505    fragments in this plane of the MCU be counted.
1506   The token lists for each color plane and coefficient should also be filled
1507    in, along with initial token offsets, extra bits offsets, and EOB run
1508    counts.*/
1509 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1510  oc_dec_pipeline_state *_pipe,int _pli){
1511   unsigned char       *dct_tokens;
1512   const unsigned char *dct_fzig_zag;
1513   ogg_uint16_t         dc_quant[2];
1514   const oc_fragment   *frags;
1515   const ptrdiff_t     *coded_fragis;
1516   ptrdiff_t            ncoded_fragis;
1517   ptrdiff_t            fragii;
1518   ptrdiff_t           *ti;
1519   ptrdiff_t           *eob_runs;
1520   int                  qti;
1521   dct_tokens=_dec->dct_tokens;
1522   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1523   frags=_dec->state.frags;
1524   coded_fragis=_pipe->coded_fragis[_pli];
1525   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1526   ti=_pipe->ti[_pli];
1527   eob_runs=_pipe->eob_runs[_pli];
1528   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1529   for(fragii=0;fragii<ncoded_fragis;fragii++){
1530     const ogg_uint16_t *ac_quant;
1531     ptrdiff_t           fragi;
1532     int                 last_zzi;
1533     int                 zzi;
1534     fragi=coded_fragis[fragii];
1535     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537     /*Decode the AC coefficients.*/
1538     for(zzi=0;zzi<64;){
1539       int token;
1540       last_zzi=zzi;
1541       if(eob_runs[zzi]){
1542         eob_runs[zzi]--;
1543         break;
1544       }
1545       else{
1546         ptrdiff_t eob;
1547         int       cw;
1548         int       rlen;
1549         int       coeff;
1550         int       lti;
1551         lti=ti[zzi];
1552         token=dct_tokens[lti++];
1553         cw=OC_DCT_CODE_WORD[token];
1554         /*These parts could be done branchless, but the branches are fairly
1555            predictable and the C code translates into more than a few
1556            instructions, so it's worth it to avoid them.*/
1557         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559         }
1560         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561         if(token==OC_DCT_TOKEN_FAT_EOB){
1562           eob+=dct_tokens[lti++]<<8;
1563           if(eob==0)eob=OC_DCT_EOB_FINISH;
1564         }
1565         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568         eob_runs[zzi]=eob;
1569         ti[zzi]=lti;
1570         zzi+=rlen;
1571         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1572          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1573         zzi+=!eob;
1574       }
1575     }
1576     /*TODO: zzi should be exactly 64 here.
1577       If it's not, we should report some kind of warning.*/
1578     zzi=OC_MINI(zzi,64);
1579     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1580     /*last_zzi is always initialized.
1581       If your compiler thinks otherwise, it is dumb.*/
1582     oc_state_frag_recon(&_dec->state,fragi,_pli,
1583      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1584   }
1585   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1586   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1587   /*TODO: We make the decision here to always copy the uncoded blocks into it
1588      from the reference frame.
1589     We could also copy the coded blocks back over the reference frame, if we
1590      wait for an additional MCU to be decoded, which might be faster if only a
1591      small number of blocks are coded.
1592     However, this introduces more latency, creating a larger cache footprint.
1593     It's unknown which decision is better, but this one results in simpler
1594      code, and the hard case (high bitrate, high resolution) is handled
1595      correctly.*/
1596   /*Copy the uncoded blocks from the previous reference frame.*/
1597   if(_pipe->nuncoded_fragis[_pli]>0){
1598     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1599     oc_frag_copy_list(&_dec->state,
1600      _dec->state.ref_frame_data[OC_FRAME_SELF],
1601      _dec->state.ref_frame_data[OC_FRAME_PREV],
1602      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1603      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1604   }
1605 }
1606
1607 /*Filter a horizontal block edge.*/
1608 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1609  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1610  int *_variance0,int *_variance1){
1611   unsigned char       *rdst;
1612   const unsigned char *rsrc;
1613   unsigned char       *cdst;
1614   const unsigned char *csrc;
1615   int                  r[10];
1616   int                  sum0;
1617   int                  sum1;
1618   int                  bx;
1619   int                  by;
1620   rdst=_dst;
1621   rsrc=_src;
1622   for(bx=0;bx<8;bx++){
1623     cdst=rdst;
1624     csrc=rsrc;
1625     for(by=0;by<10;by++){
1626       r[by]=*csrc;
1627       csrc+=_src_ystride;
1628     }
1629     sum0=sum1=0;
1630     for(by=0;by<4;by++){
1631       sum0+=abs(r[by+1]-r[by]);
1632       sum1+=abs(r[by+5]-r[by+6]);
1633     }
1634     *_variance0+=OC_MINI(255,sum0);
1635     *_variance1+=OC_MINI(255,sum1);
1636     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1637       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1638       cdst+=_dst_ystride;
1639       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1640       cdst+=_dst_ystride;
1641       for(by=0;by<4;by++){
1642         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1643          r[by+4]+r[by+5]+r[by+6]+4>>3);
1644         cdst+=_dst_ystride;
1645       }
1646       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1647       cdst+=_dst_ystride;
1648       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1649     }
1650     else{
1651       for(by=1;by<=8;by++){
1652         *cdst=(unsigned char)r[by];
1653         cdst+=_dst_ystride;
1654       }
1655     }
1656     rdst++;
1657     rsrc++;
1658   }
1659 }
1660
1661 /*Filter a vertical block edge.*/
1662 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1663  int _qstep,int _flimit,int *_variances){
1664   unsigned char       *rdst;
1665   const unsigned char *rsrc;
1666   unsigned char       *cdst;
1667   int                  r[10];
1668   int                  sum0;
1669   int                  sum1;
1670   int                  bx;
1671   int                  by;
1672   cdst=_dst;
1673   for(by=0;by<8;by++){
1674     rsrc=cdst-1;
1675     rdst=cdst;
1676     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1677     sum0=sum1=0;
1678     for(bx=0;bx<4;bx++){
1679       sum0+=abs(r[bx+1]-r[bx]);
1680       sum1+=abs(r[bx+5]-r[bx+6]);
1681     }
1682     _variances[0]+=OC_MINI(255,sum0);
1683     _variances[1]+=OC_MINI(255,sum1);
1684     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1685       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1686       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1687       for(bx=0;bx<4;bx++){
1688         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1689          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1690       }
1691       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1692       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1693     }
1694     cdst+=_dst_ystride;
1695   }
1696 }
1697
1698 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1699  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1700  int _fragy_end){
1701   oc_fragment_plane   *fplane;
1702   int                 *variance;
1703   unsigned char       *dc_qi;
1704   unsigned char       *dst;
1705   const unsigned char *src;
1706   ptrdiff_t            froffset;
1707   int                  dst_ystride;
1708   int                  src_ystride;
1709   int                  nhfrags;
1710   int                  width;
1711   int                  notstart;
1712   int                  notdone;
1713   int                  flimit;
1714   int                  qstep;
1715   int                  y_end;
1716   int                  y;
1717   int                  x;
1718   _dst+=_pli;
1719   _src+=_pli;
1720   fplane=_dec->state.fplanes+_pli;
1721   nhfrags=fplane->nhfrags;
1722   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1723   variance=_dec->variances+froffset;
1724   dc_qi=_dec->dc_qis+froffset;
1725   notstart=_fragy0>0;
1726   notdone=_fragy_end<fplane->nvfrags;
1727   /*We want to clear an extra row of variances, except at the end.*/
1728   memset(variance+(nhfrags&-notstart),0,
1729    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1730   /*Except for the first time, we want to point to the middle of the row.*/
1731   y=(_fragy0<<3)+(notstart<<2);
1732   dst_ystride=_dst->stride;
1733   src_ystride=_src->stride;
1734   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1735   src=_src->data+y*(ptrdiff_t)src_ystride;
1736   width=_dst->width;
1737   for(;y<4;y++){
1738     memcpy(dst,src,width*sizeof(dst[0]));
1739     dst+=dst_ystride;
1740     src+=src_ystride;
1741   }
1742   /*We also want to skip the last row in the frame for this loop.*/
1743   y_end=_fragy_end-!notdone<<3;
1744   for(;y<y_end;y+=8){
1745     qstep=_dec->pp_dc_scale[*dc_qi];
1746     flimit=(qstep*3)>>2;
1747     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1748      qstep,flimit,variance,variance+nhfrags);
1749     variance++;
1750     dc_qi++;
1751     for(x=8;x<width;x+=8){
1752       qstep=_dec->pp_dc_scale[*dc_qi];
1753       flimit=(qstep*3)>>2;
1754       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1755        qstep,flimit,variance,variance+nhfrags);
1756       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1757        qstep,flimit,variance-1);
1758       variance++;
1759       dc_qi++;
1760     }
1761     dst+=dst_ystride<<3;
1762     src+=src_ystride<<3;
1763   }
1764   /*And finally, handle the last row in the frame, if it's in the range.*/
1765   if(!notdone){
1766     int height;
1767     height=_dst->height;
1768     for(;y<height;y++){
1769       memcpy(dst,src,width*sizeof(dst[0]));
1770       dst+=dst_ystride;
1771       src+=src_ystride;
1772     }
1773     /*Filter the last row of vertical block edges.*/
1774     dc_qi++;
1775     for(x=8;x<width;x+=8){
1776       qstep=_dec->pp_dc_scale[*dc_qi++];
1777       flimit=(qstep*3)>>2;
1778       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1779        qstep,flimit,variance++);
1780     }
1781   }
1782 }
1783
1784 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1785  int _dc_scale,int _sharp_mod,int _strong){
1786   static const unsigned char OC_MOD_MAX[2]={24,32};
1787   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1788   const unsigned char *psrc;
1789   const unsigned char *src;
1790   const unsigned char *nsrc;
1791   unsigned char       *dst;
1792   int                  vmod[72];
1793   int                  hmod[72];
1794   int                  mod_hi;
1795   int                  by;
1796   int                  bx;
1797   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1798   dst=_idata;
1799   src=dst;
1800   psrc=src-(_ystride&-!(_b&4));
1801   for(by=0;by<9;by++){
1802     for(bx=0;bx<8;bx++){
1803       int mod;
1804       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1805       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1806     }
1807     psrc=src;
1808     src+=_ystride&-(!(_b&8)|by<7);
1809   }
1810   nsrc=dst;
1811   psrc=dst-!(_b&1);
1812   for(bx=0;bx<9;bx++){
1813     src=nsrc;
1814     for(by=0;by<8;by++){
1815       int mod;
1816       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1817       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1818       psrc+=_ystride;
1819       src+=_ystride;
1820     }
1821     psrc=nsrc;
1822     nsrc+=!(_b&2)|bx<7;
1823   }
1824   src=dst;
1825   psrc=src-(_ystride&-!(_b&4));
1826   nsrc=src+_ystride;
1827   for(by=0;by<8;by++){
1828     int a;
1829     int b;
1830     int w;
1831     a=128;
1832     b=64;
1833     w=hmod[by];
1834     a-=w;
1835     b+=w**(src-!(_b&1));
1836     w=vmod[by<<3];
1837     a-=w;
1838     b+=w*psrc[0];
1839     w=vmod[by+1<<3];
1840     a-=w;
1841     b+=w*nsrc[0];
1842     w=hmod[(1<<3)+by];
1843     a-=w;
1844     b+=w*src[1];
1845     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1846     for(bx=1;bx<7;bx++){
1847       a=128;
1848       b=64;
1849       w=hmod[(bx<<3)+by];
1850       a-=w;
1851       b+=w*src[bx-1];
1852       w=vmod[(by<<3)+bx];
1853       a-=w;
1854       b+=w*psrc[bx];
1855       w=vmod[(by+1<<3)+bx];
1856       a-=w;
1857       b+=w*nsrc[bx];
1858       w=hmod[(bx+1<<3)+by];
1859       a-=w;
1860       b+=w*src[bx+1];
1861       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1862     }
1863     a=128;
1864     b=64;
1865     w=hmod[(7<<3)+by];
1866     a-=w;
1867     b+=w*src[6];
1868     w=vmod[(by<<3)+7];
1869     a-=w;
1870     b+=w*psrc[7];
1871     w=vmod[(by+1<<3)+7];
1872     a-=w;
1873     b+=w*nsrc[7];
1874     w=hmod[(8<<3)+by];
1875     a-=w;
1876     b+=w*src[7+!(_b&2)];
1877     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1878     dst+=_ystride;
1879     psrc=src;
1880     src=nsrc;
1881     nsrc+=_ystride&-(!(_b&8)|by<6);
1882   }
1883 }
1884
1885 #define OC_DERING_THRESH1 (384)
1886 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1887 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1888 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1889
1890 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1891  int _pli,int _fragy0,int _fragy_end){
1892   th_img_plane      *iplane;
1893   oc_fragment_plane *fplane;
1894   oc_fragment       *frag;
1895   int               *variance;
1896   unsigned char     *idata;
1897   ptrdiff_t          froffset;
1898   int                ystride;
1899   int                nhfrags;
1900   int                sthresh;
1901   int                strong;
1902   int                y_end;
1903   int                width;
1904   int                height;
1905   int                y;
1906   int                x;
1907   iplane=_img+_pli;
1908   fplane=_dec->state.fplanes+_pli;
1909   nhfrags=fplane->nhfrags;
1910   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1911   variance=_dec->variances+froffset;
1912   frag=_dec->state.frags+froffset;
1913   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1914   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1915   y=_fragy0<<3;
1916   ystride=iplane->stride;
1917   idata=iplane->data+y*(ptrdiff_t)ystride;
1918   y_end=_fragy_end<<3;
1919   width=iplane->width;
1920   height=iplane->height;
1921   for(;y<y_end;y+=8){
1922     for(x=0;x<width;x+=8){
1923       int b;
1924       int qi;
1925       int var;
1926       qi=_dec->state.qis[frag->qii];
1927       var=*variance;
1928       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1929       if(strong&&var>sthresh){
1930         oc_dering_block(idata+x,ystride,b,
1931          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1933          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1934          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1935          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1936           oc_dering_block(idata+x,ystride,b,
1937            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1938           oc_dering_block(idata+x,ystride,b,
1939            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940         }
1941       }
1942       else if(var>OC_DERING_THRESH2){
1943         oc_dering_block(idata+x,ystride,b,
1944          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1945       }
1946       else if(var>OC_DERING_THRESH1){
1947         oc_dering_block(idata+x,ystride,b,
1948          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1949       }
1950       frag++;
1951       variance++;
1952     }
1953     idata+=ystride<<3;
1954   }
1955 }
1956
1957
1958
1959 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1960   oc_dec_ctx *dec;
1961   if(_info==NULL||_setup==NULL)return NULL;
1962   dec=oc_aligned_malloc(sizeof(*dec),16);
1963   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1964     oc_aligned_free(dec);
1965     return NULL;
1966   }
1967   dec->state.curframe_num=0;
1968   return dec;
1969 }
1970
1971 void th_decode_free(th_dec_ctx *_dec){
1972   if(_dec!=NULL){
1973     oc_dec_clear(_dec);
1974     oc_aligned_free(_dec);
1975   }
1976 }
1977
1978 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1979  size_t _buf_sz){
1980   switch(_req){
1981   case TH_DECCTL_GET_PPLEVEL_MAX:{
1982     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1983     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1984     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1985     return 0;
1986   }break;
1987   case TH_DECCTL_SET_PPLEVEL:{
1988     int pp_level;
1989     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1990     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1991     pp_level=*(int *)_buf;
1992     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1993     _dec->pp_level=pp_level;
1994     return 0;
1995   }break;
1996   case TH_DECCTL_SET_GRANPOS:{
1997     ogg_int64_t granpos;
1998     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1999     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2000     granpos=*(ogg_int64_t *)_buf;
2001     if(granpos<0)return TH_EINVAL;
2002     _dec->state.granpos=granpos;
2003     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2004      -_dec->state.granpos_bias;
2005     _dec->state.curframe_num=_dec->state.keyframe_num
2006      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2007     return 0;
2008   }break;
2009   case TH_DECCTL_SET_STRIPE_CB:{
2010     th_stripe_callback *cb;
2011     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2012     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2013     cb=(th_stripe_callback *)_buf;
2014     _dec->stripe_cb.ctx=cb->ctx;
2015     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2016     return 0;
2017   }break;
2018 #ifdef HAVE_CAIRO
2019   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2020     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2021     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2022     _dec->telemetry=1;
2023     _dec->telemetry_mbmode=*(int *)_buf;
2024     return 0;
2025   }break;
2026   case TH_DECCTL_SET_TELEMETRY_MV:{
2027     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2028     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2029     _dec->telemetry=1;
2030     _dec->telemetry_mv=*(int *)_buf;
2031     return 0;
2032   }break;
2033   case TH_DECCTL_SET_TELEMETRY_QI:{
2034     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036     _dec->telemetry=1;
2037     _dec->telemetry_qi=*(int *)_buf;
2038     return 0;
2039   }break;
2040   case TH_DECCTL_SET_TELEMETRY_BITS:{
2041     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2042     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2043     _dec->telemetry=1;
2044     _dec->telemetry_bits=*(int *)_buf;
2045     return 0;
2046   }break;
2047 #endif
2048   default:return TH_EIMPL;
2049   }
2050 }
2051
2052 /*We're decoding an INTER frame, but have no initialized reference
2053    buffers (i.e., decoding did not start on a key frame).
2054   We initialize them to a solid gray here.*/
2055 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2056   th_info   *info;
2057   size_t     yplane_sz;
2058   size_t     cplane_sz;
2059   ptrdiff_t  yoffset;
2060   int        yhstride;
2061   int        yheight;
2062   int        chstride;
2063   int        cheight;
2064   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2065   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2066   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2067   _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2068    _dec->state.ref_frame_data[OC_FRAME_PREV]=
2069    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2070    _dec->state.ref_frame_bufs[0][0].data;
2071   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2072    sizeof(_dec->pp_frame_buf[0])*3);
2073   info=&_dec->state.info;
2074   yhstride=abs(_dec->state.ref_ystride[0]);
2075   yheight=info->frame_height+2*OC_UMV_PADDING;
2076   chstride=abs(_dec->state.ref_ystride[1]);
2077   cheight=yheight>>!(info->pixel_fmt&2);
2078   yplane_sz=yhstride*(size_t)yheight+16;
2079   cplane_sz=chstride*(size_t)cheight;
2080   yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2081   memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2082 }
2083
2084 #if defined(HAVE_CAIRO)
2085 static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2086  int _telemetry){
2087   /*Stuff the plane into cairo.*/
2088   cairo_surface_t *cs;
2089   unsigned char   *data;
2090   unsigned char   *y_row;
2091   unsigned char   *u_row;
2092   unsigned char   *v_row;
2093   unsigned char   *rgb_row;
2094   int              cstride;
2095   int              w;
2096   int              h;
2097   int              x;
2098   int              y;
2099   int              hdec;
2100   int              vdec;
2101   w=_ycbcr[0].width;
2102   h=_ycbcr[0].height;
2103   hdec=!(_dec->state.info.pixel_fmt&1);
2104   vdec=!(_dec->state.info.pixel_fmt&2);
2105   /*Lazy data buffer init.
2106     We could try to re-use the post-processing buffer, which would save
2107      memory, but complicate the allocation logic there.
2108     I don't think anyone cares about memory usage when using telemetry; it is
2109      not meant for embedded devices.*/
2110   if(_dec->telemetry_frame_data==NULL){
2111     _dec->telemetry_frame_data=_ogg_malloc(
2112      (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2113     if(_dec->telemetry_frame_data==NULL)return;
2114   }
2115   cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2116   /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2117   data=cairo_image_surface_get_data(cs);
2118   if(data==NULL){
2119     cairo_surface_destroy(cs);
2120     return;
2121   }
2122   cstride=cairo_image_surface_get_stride(cs);
2123   y_row=_ycbcr[0].data;
2124   u_row=_ycbcr[1].data;
2125   v_row=_ycbcr[2].data;
2126   rgb_row=data;
2127   for(y=0;y<h;y++){
2128     for(x=0;x<w;x++){
2129       int r;
2130       int g;
2131       int b;
2132       r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2133       g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2134        -2672387*v_row[x>>hdec]+447306710)/3287200;
2135       b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2136       rgb_row[4*x+0]=OC_CLAMP255(b);
2137       rgb_row[4*x+1]=OC_CLAMP255(g);
2138       rgb_row[4*x+2]=OC_CLAMP255(r);
2139     }
2140     y_row+=_ycbcr[0].stride;
2141     u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2142     v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2143     rgb_row+=cstride;
2144   }
2145   /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2146   {
2147     cairo_t           *c;
2148     const oc_fragment *frags;
2149     oc_mv             *frag_mvs;
2150     const signed char *mb_modes;
2151     oc_mb_map         *mb_maps;
2152     size_t             nmbs;
2153     size_t             mbi;
2154     int                row2;
2155     int                col2;
2156     int                qim[3]={0,0,0};
2157     if(_dec->state.nqis==2){
2158       int bqi;
2159       bqi=_dec->state.qis[0];
2160       if(_dec->state.qis[1]>bqi)qim[1]=1;
2161       if(_dec->state.qis[1]<bqi)qim[1]=-1;
2162     }
2163     if(_dec->state.nqis==3){
2164       int bqi;
2165       int cqi;
2166       int dqi;
2167       bqi=_dec->state.qis[0];
2168       cqi=_dec->state.qis[1];
2169       dqi=_dec->state.qis[2];
2170       if(cqi>bqi&&dqi>bqi){
2171         if(dqi>cqi){
2172           qim[1]=1;
2173           qim[2]=2;
2174         }
2175         else{
2176           qim[1]=2;
2177           qim[2]=1;
2178         }
2179       }
2180       else if(cqi<bqi&&dqi<bqi){
2181         if(dqi<cqi){
2182           qim[1]=-1;
2183           qim[2]=-2;
2184         }
2185         else{
2186           qim[1]=-2;
2187           qim[2]=-1;
2188         }
2189       }
2190       else{
2191         if(cqi<bqi)qim[1]=-1;
2192         else qim[1]=1;
2193         if(dqi<bqi)qim[2]=-1;
2194         else qim[2]=1;
2195       }
2196     }
2197     c=cairo_create(cs);
2198     frags=_dec->state.frags;
2199     frag_mvs=_dec->state.frag_mvs;
2200     mb_modes=_dec->state.mb_modes;
2201     mb_maps=_dec->state.mb_maps;
2202     nmbs=_dec->state.nmbs;
2203     row2=0;
2204     col2=0;
2205     for(mbi=0;mbi<nmbs;mbi++){
2206       float x;
2207       float y;
2208       int   bi;
2209       y=h-(row2+((col2+1>>1)&1))*16-16;
2210       x=(col2>>1)*16;
2211       cairo_set_line_width(c,1.);
2212       /*Keyframe (all intra) red box.*/
2213       if(_dec->state.frame_type==OC_INTRA_FRAME){
2214         if(_dec->telemetry_mbmode&0x02){
2215           cairo_set_source_rgba(c,1.,0,0,.5);
2216           cairo_rectangle(c,x+2.5,y+2.5,11,11);
2217           cairo_stroke_preserve(c);
2218           cairo_set_source_rgba(c,1.,0,0,.25);
2219           cairo_fill(c);
2220         }
2221       }
2222       else{
2223         ptrdiff_t fragi;
2224         int       frag_mvx;
2225         int       frag_mvy;
2226         for(bi=0;bi<4;bi++){
2227           fragi=mb_maps[mbi][0][bi];
2228           if(fragi>=0&&frags[fragi].coded){
2229             frag_mvx=OC_MV_X(frag_mvs[fragi]);
2230             frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2231             break;
2232           }
2233         }
2234         if(bi<4){
2235           switch(mb_modes[mbi]){
2236             case OC_MODE_INTRA:{
2237               if(_dec->telemetry_mbmode&0x02){
2238                 cairo_set_source_rgba(c,1.,0,0,.5);
2239                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2240                 cairo_stroke_preserve(c);
2241                 cairo_set_source_rgba(c,1.,0,0,.25);
2242                 cairo_fill(c);
2243               }
2244             }break;
2245             case OC_MODE_INTER_NOMV:{
2246               if(_dec->telemetry_mbmode&0x01){
2247                 cairo_set_source_rgba(c,0,0,1.,.5);
2248                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2249                 cairo_stroke_preserve(c);
2250                 cairo_set_source_rgba(c,0,0,1.,.25);
2251                 cairo_fill(c);
2252               }
2253             }break;
2254             case OC_MODE_INTER_MV:{
2255               if(_dec->telemetry_mbmode&0x04){
2256                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2257                 cairo_set_source_rgba(c,0,1.,0,.5);
2258                 cairo_stroke(c);
2259               }
2260               if(_dec->telemetry_mv&0x04){
2261                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2262                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2263                 cairo_set_line_width(c,3.);
2264                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2265                 cairo_stroke_preserve(c);
2266                 cairo_set_line_width(c,2.);
2267                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2268                 cairo_stroke_preserve(c);
2269                 cairo_set_line_width(c,1.);
2270                 cairo_line_to(c,x+8,y+8);
2271                 cairo_stroke(c);
2272               }
2273             }break;
2274             case OC_MODE_INTER_MV_LAST:{
2275               if(_dec->telemetry_mbmode&0x08){
2276                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2277                 cairo_set_source_rgba(c,0,1.,0,.5);
2278                 cairo_move_to(c,x+13.5,y+2.5);
2279                 cairo_line_to(c,x+2.5,y+8);
2280                 cairo_line_to(c,x+13.5,y+13.5);
2281                 cairo_stroke(c);
2282               }
2283               if(_dec->telemetry_mv&0x08){
2284                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2285                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2286                 cairo_set_line_width(c,3.);
2287                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2288                 cairo_stroke_preserve(c);
2289                 cairo_set_line_width(c,2.);
2290                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2291                 cairo_stroke_preserve(c);
2292                 cairo_set_line_width(c,1.);
2293                 cairo_line_to(c,x+8,y+8);
2294                 cairo_stroke(c);
2295               }
2296             }break;
2297             case OC_MODE_INTER_MV_LAST2:{
2298               if(_dec->telemetry_mbmode&0x10){
2299                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2300                 cairo_set_source_rgba(c,0,1.,0,.5);
2301                 cairo_move_to(c,x+8,y+2.5);
2302                 cairo_line_to(c,x+2.5,y+8);
2303                 cairo_line_to(c,x+8,y+13.5);
2304                 cairo_move_to(c,x+13.5,y+2.5);
2305                 cairo_line_to(c,x+8,y+8);
2306                 cairo_line_to(c,x+13.5,y+13.5);
2307                 cairo_stroke(c);
2308               }
2309               if(_dec->telemetry_mv&0x10){
2310                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2311                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2312                 cairo_set_line_width(c,3.);
2313                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2314                 cairo_stroke_preserve(c);
2315                 cairo_set_line_width(c,2.);
2316                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2317                 cairo_stroke_preserve(c);
2318                 cairo_set_line_width(c,1.);
2319                 cairo_line_to(c,x+8,y+8);
2320                 cairo_stroke(c);
2321               }
2322             }break;
2323             case OC_MODE_GOLDEN_NOMV:{
2324               if(_dec->telemetry_mbmode&0x20){
2325                 cairo_set_source_rgba(c,1.,1.,0,.5);
2326                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2327                 cairo_stroke_preserve(c);
2328                 cairo_set_source_rgba(c,1.,1.,0,.25);
2329                 cairo_fill(c);
2330               }
2331             }break;
2332             case OC_MODE_GOLDEN_MV:{
2333               if(_dec->telemetry_mbmode&0x40){
2334                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2335                 cairo_set_source_rgba(c,1.,1.,0,.5);
2336                 cairo_stroke(c);
2337               }
2338               if(_dec->telemetry_mv&0x40){
2339                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2340                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2341                 cairo_set_line_width(c,3.);
2342                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2343                 cairo_stroke_preserve(c);
2344                 cairo_set_line_width(c,2.);
2345                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2346                 cairo_stroke_preserve(c);
2347                 cairo_set_line_width(c,1.);
2348                 cairo_line_to(c,x+8,y+8);
2349                 cairo_stroke(c);
2350               }
2351             }break;
2352             case OC_MODE_INTER_MV_FOUR:{
2353               if(_dec->telemetry_mbmode&0x80){
2354                 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2355                 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2356                 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2357                 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2358                 cairo_set_source_rgba(c,0,1.,0,.5);
2359                 cairo_stroke(c);
2360               }
2361               /*4mv is odd, coded in raster order.*/
2362               fragi=mb_maps[mbi][0][0];
2363               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2364                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2365                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2366                 cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2367                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2368                 cairo_set_line_width(c,3.);
2369                 cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2370                 cairo_stroke_preserve(c);
2371                 cairo_set_line_width(c,2.);
2372                 cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2373                 cairo_stroke_preserve(c);
2374                 cairo_set_line_width(c,1.);
2375                 cairo_line_to(c,x+4,y+12);
2376                 cairo_stroke(c);
2377               }
2378               fragi=mb_maps[mbi][0][1];
2379               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2380                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2381                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2382                 cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2383                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2384                 cairo_set_line_width(c,3.);
2385                 cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2386                 cairo_stroke_preserve(c);
2387                 cairo_set_line_width(c,2.);
2388                 cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2389                 cairo_stroke_preserve(c);
2390                 cairo_set_line_width(c,1.);
2391                 cairo_line_to(c,x+12,y+12);
2392                 cairo_stroke(c);
2393               }
2394               fragi=mb_maps[mbi][0][2];
2395               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2396                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2397                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2398                 cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2399                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2400                 cairo_set_line_width(c,3.);
2401                 cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2402                 cairo_stroke_preserve(c);
2403                 cairo_set_line_width(c,2.);
2404                 cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2405                 cairo_stroke_preserve(c);
2406                 cairo_set_line_width(c,1.);
2407                 cairo_line_to(c,x+4,y+4);
2408                 cairo_stroke(c);
2409               }
2410               fragi=mb_maps[mbi][0][3];
2411               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2412                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2413                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2414                 cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2415                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2416                 cairo_set_line_width(c,3.);
2417                 cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2418                 cairo_stroke_preserve(c);
2419                 cairo_set_line_width(c,2.);
2420                 cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2421                 cairo_stroke_preserve(c);
2422                 cairo_set_line_width(c,1.);
2423                 cairo_line_to(c,x+12,y+4);
2424                 cairo_stroke(c);
2425               }
2426             }break;
2427           }
2428         }
2429       }
2430       /*qii illustration.*/
2431       if(_dec->telemetry_qi&0x2){
2432         cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2433         for(bi=0;bi<4;bi++){
2434           ptrdiff_t fragi;
2435           int       qiv;
2436           int       xp;
2437           int       yp;
2438           xp=x+(bi&1)*8;
2439           yp=y+8-(bi&2)*4;
2440           fragi=mb_maps[mbi][0][bi];
2441           if(fragi>=0&&frags[fragi].coded){
2442             qiv=qim[frags[fragi].qii];
2443             cairo_set_line_width(c,3.);
2444             cairo_set_source_rgba(c,0.,0.,0.,.5);
2445             switch(qiv){
2446               /*Double plus:*/
2447               case 2:{
2448                 if((bi&1)^((bi&2)>>1)){
2449                   cairo_move_to(c,xp+2.5,yp+1.5);
2450                   cairo_line_to(c,xp+2.5,yp+3.5);
2451                   cairo_move_to(c,xp+1.5,yp+2.5);
2452                   cairo_line_to(c,xp+3.5,yp+2.5);
2453                   cairo_move_to(c,xp+5.5,yp+4.5);
2454                   cairo_line_to(c,xp+5.5,yp+6.5);
2455                   cairo_move_to(c,xp+4.5,yp+5.5);
2456                   cairo_line_to(c,xp+6.5,yp+5.5);
2457                   cairo_stroke_preserve(c);
2458                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2459                 }
2460                 else{
2461                   cairo_move_to(c,xp+5.5,yp+1.5);
2462                   cairo_line_to(c,xp+5.5,yp+3.5);
2463                   cairo_move_to(c,xp+4.5,yp+2.5);
2464                   cairo_line_to(c,xp+6.5,yp+2.5);
2465                   cairo_move_to(c,xp+2.5,yp+4.5);
2466                   cairo_line_to(c,xp+2.5,yp+6.5);
2467                   cairo_move_to(c,xp+1.5,yp+5.5);
2468                   cairo_line_to(c,xp+3.5,yp+5.5);
2469                   cairo_stroke_preserve(c);
2470                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2471                 }
2472               }break;
2473               /*Double minus:*/
2474               case -2:{
2475                 cairo_move_to(c,xp+2.5,yp+2.5);
2476                 cairo_line_to(c,xp+5.5,yp+2.5);
2477                 cairo_move_to(c,xp+2.5,yp+5.5);
2478                 cairo_line_to(c,xp+5.5,yp+5.5);
2479                 cairo_stroke_preserve(c);
2480                 cairo_set_source_rgba(c,1.,1.,1.,1.);
2481               }break;
2482               /*Plus:*/
2483               case 1:{
2484                 if((bi&2)==0)yp-=2;
2485                 if((bi&1)==0)xp-=2;
2486                 cairo_move_to(c,xp+4.5,yp+2.5);
2487                 cairo_line_to(c,xp+4.5,yp+6.5);
2488                 cairo_move_to(c,xp+2.5,yp+4.5);
2489                 cairo_line_to(c,xp+6.5,yp+4.5);
2490                 cairo_stroke_preserve(c);
2491                 cairo_set_source_rgba(c,.1,1.,.3,1.);
2492                 break;
2493               }
2494               /*Fall through.*/
2495               /*Minus:*/
2496               case -1:{
2497                 cairo_move_to(c,xp+2.5,yp+4.5);
2498                 cairo_line_to(c,xp+6.5,yp+4.5);
2499                 cairo_stroke_preserve(c);
2500                 cairo_set_source_rgba(c,1.,.3,.1,1.);
2501               }break;
2502               default:continue;
2503             }
2504             cairo_set_line_width(c,1.);
2505             cairo_stroke(c);
2506           }
2507         }
2508       }
2509       col2++;
2510       if((col2>>1)>=_dec->state.nhmbs){
2511         col2=0;
2512         row2+=2;
2513       }
2514     }
2515     /*Bit usage indicator[s]:*/
2516     if(_dec->telemetry_bits){
2517       int widths[6];
2518       int fpsn;
2519       int fpsd;
2520       int mult;
2521       int fullw;
2522       int padw;
2523       int i;
2524       fpsn=_dec->state.info.fps_numerator;
2525       fpsd=_dec->state.info.fps_denominator;
2526       mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2527       fullw=250.f*h*fpsd*mult/fpsn;
2528       padw=w-24;
2529       /*Header and coded block bits.*/
2530       if(_dec->telemetry_frame_bytes<0||
2531        _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2532         _dec->telemetry_frame_bytes=0;
2533       }
2534       if(_dec->telemetry_coding_bytes<0||
2535        _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2536         _dec->telemetry_coding_bytes=0;
2537       }
2538       if(_dec->telemetry_mode_bytes<0||
2539        _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2540         _dec->telemetry_mode_bytes=0;
2541       }
2542       if(_dec->telemetry_mv_bytes<0||
2543        _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2544         _dec->telemetry_mv_bytes=0;
2545       }
2546       if(_dec->telemetry_qi_bytes<0||
2547        _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2548         _dec->telemetry_qi_bytes=0;
2549       }
2550       if(_dec->telemetry_dc_bytes<0||
2551        _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2552         _dec->telemetry_dc_bytes=0;
2553       }
2554       widths[0]=padw*
2555        (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2556       widths[1]=padw*
2557        (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2558       widths[2]=padw*
2559        (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2560       widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2561       widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2562       widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2563       for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2564       cairo_set_source_rgba(c,.0,.0,.0,.6);
2565       cairo_rectangle(c,10,h-33,widths[0]+1,5);
2566       cairo_rectangle(c,10,h-29,widths[1]+1,5);
2567       cairo_rectangle(c,10,h-25,widths[2]+1,5);
2568       cairo_rectangle(c,10,h-21,widths[3]+1,5);
2569       cairo_rectangle(c,10,h-17,widths[4]+1,5);
2570       cairo_rectangle(c,10,h-13,widths[5]+1,5);
2571       cairo_fill(c);
2572       cairo_set_source_rgb(c,1,0,0);
2573       cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2574       cairo_fill(c);
2575       cairo_set_source_rgb(c,0,1,0);
2576       cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2577       cairo_fill(c);
2578       cairo_set_source_rgb(c,0,0,1);
2579       cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2580       cairo_fill(c);
2581       cairo_set_source_rgb(c,.6,.4,.0);
2582       cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2583       cairo_fill(c);
2584       cairo_set_source_rgb(c,.3,.3,.3);
2585       cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2586       cairo_fill(c);
2587       cairo_set_source_rgb(c,.5,.5,.8);
2588       cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2589       cairo_fill(c);
2590     }
2591     /*Master qi indicator[s]:*/
2592     if(_dec->telemetry_qi&0x1){
2593       cairo_text_extents_t extents;
2594       char                 buffer[10];
2595       int                  p;
2596       int                  y;
2597       p=0;
2598       y=h-7.5;
2599       if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2600       buffer[p++]=48+_dec->state.qis[0]%10;
2601       if(_dec->state.nqis>=2){
2602         buffer[p++]=' ';
2603         if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2604         buffer[p++]=48+_dec->state.qis[1]%10;
2605       }
2606       if(_dec->state.nqis==3){
2607         buffer[p++]=' ';
2608         if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2609         buffer[p++]=48+_dec->state.qis[2]%10;
2610       }
2611       buffer[p++]='\0';
2612       cairo_select_font_face(c,"sans",
2613        CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2614       cairo_set_font_size(c,18);
2615       cairo_text_extents(c,buffer,&extents);
2616       cairo_set_source_rgb(c,1,1,1);
2617       cairo_move_to(c,w-extents.x_advance-10,y);
2618       cairo_show_text(c,buffer);
2619       cairo_set_source_rgb(c,0,0,0);
2620       cairo_move_to(c,w-extents.x_advance-10,y);
2621       cairo_text_path(c,buffer);
2622       cairo_set_line_width(c,.8);
2623       cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2624       cairo_stroke(c);
2625     }
2626     cairo_destroy(c);
2627   }
2628   /*Out of the Cairo plane into the telemetry YUV buffer.*/
2629   _ycbcr[0].data=_dec->telemetry_frame_data;
2630   _ycbcr[0].stride=_ycbcr[0].width;
2631   _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2632   _ycbcr[1].stride=_ycbcr[1].width;
2633   _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2634   _ycbcr[2].stride=_ycbcr[2].width;
2635   y_row=_ycbcr[0].data;
2636   u_row=_ycbcr[1].data;
2637   v_row=_ycbcr[2].data;
2638   rgb_row=data;
2639   /*This is one of the few places it's worth handling chroma on a
2640      case-by-case basis.*/
2641   switch(_dec->state.info.pixel_fmt){
2642     case TH_PF_420:{
2643       for(y=0;y<h;y+=2){
2644         unsigned char *y_row2;
2645         unsigned char *rgb_row2;
2646         y_row2=y_row+_ycbcr[0].stride;
2647         rgb_row2=rgb_row+cstride;
2648         for(x=0;x<w;x+=2){
2649           int y;
2650           int u;
2651           int v;
2652           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2653            +24966*rgb_row[4*x+0]+4207500)/255000;
2654           y_row[x]=OC_CLAMP255(y);
2655           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2656            +24966*rgb_row[4*x+4]+4207500)/255000;
2657           y_row[x+1]=OC_CLAMP255(y);
2658           y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2659            +24966*rgb_row2[4*x+0]+4207500)/255000;
2660           y_row2[x]=OC_CLAMP255(y);
2661           y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2662            +24966*rgb_row2[4*x+4]+4207500)/255000;
2663           y_row2[x+1]=OC_CLAMP255(y);
2664           u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2665            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2666            -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2667            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2668            +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2669            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2670           v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2671            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2672            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2673             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2674            -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2675             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2676           u_row[x>>1]=OC_CLAMP255(u);
2677           v_row[x>>1]=OC_CLAMP255(v);
2678         }
2679         y_row+=_ycbcr[0].stride<<1;
2680         u_row+=_ycbcr[1].stride;
2681         v_row+=_ycbcr[2].stride;
2682         rgb_row+=cstride<<1;
2683       }
2684     }break;
2685     case TH_PF_422:{
2686       for(y=0;y<h;y++){
2687         for(x=0;x<w;x+=2){
2688           int y;
2689           int u;
2690           int v;
2691           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2692            +24966*rgb_row[4*x+0]+4207500)/255000;
2693           y_row[x]=OC_CLAMP255(y);
2694           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2695            +24966*rgb_row[4*x+4]+4207500)/255000;
2696           y_row[x+1]=OC_CLAMP255(y);
2697           u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2698            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2699            +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2700           v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2701            -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2702            -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2703           u_row[x>>1]=OC_CLAMP255(u);
2704           v_row[x>>1]=OC_CLAMP255(v);
2705         }
2706         y_row+=_ycbcr[0].stride;
2707         u_row+=_ycbcr[1].stride;
2708         v_row+=_ycbcr[2].stride;
2709         rgb_row+=cstride;
2710       }
2711     }break;
2712     /*case TH_PF_444:*/
2713     default:{
2714       for(y=0;y<h;y++){
2715         for(x=0;x<w;x++){
2716           int y;
2717           int u;
2718           int v;
2719           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2720            +24966*rgb_row[4*x+0]+4207500)/255000;
2721           u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2722            +99232*rgb_row[4*x+0]+29032005)/225930;
2723           v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2724            -25536*rgb_row[4*x+0]+45940035)/357510;
2725           y_row[x]=OC_CLAMP255(y);
2726           u_row[x]=OC_CLAMP255(u);
2727           v_row[x]=OC_CLAMP255(v);
2728         }
2729         y_row+=_ycbcr[0].stride;
2730         u_row+=_ycbcr[1].stride;
2731         v_row+=_ycbcr[2].stride;
2732         rgb_row+=cstride;
2733       }
2734     }break;
2735   }
2736   /*Finished.
2737     Destroy the surface.*/
2738   cairo_surface_destroy(cs);
2739 }
2740 #endif
2741
2742 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2743  ogg_int64_t *_granpos){
2744   int ret;
2745   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2746   /*A completely empty packet indicates a dropped frame and is treated exactly
2747      like an inter frame with no coded blocks.*/
2748   if(_op->bytes==0){
2749     _dec->state.frame_type=OC_INTER_FRAME;
2750     _dec->state.ntotal_coded_fragis=0;
2751   }
2752   else{
2753     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2754     ret=oc_dec_frame_header_unpack(_dec);
2755     if(ret<0)return ret;
2756     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2757     else oc_dec_coded_flags_unpack(_dec);
2758   }
2759   /*If there have been no reference frames, and we need one, initialize one.*/
2760   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2761    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2762    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2763     oc_dec_init_dummy_frame(_dec);
2764   }
2765   /*If this was an inter frame with no coded blocks...*/
2766   if(_dec->state.ntotal_coded_fragis<=0){
2767     /*Just update the granule position and return.*/
2768     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2769      _dec->state.info.keyframe_granule_shift)
2770      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2771     _dec->state.curframe_num++;
2772     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2773     return TH_DUPFRAME;
2774   }
2775   else{
2776     th_ycbcr_buffer stripe_buf;
2777     int             stripe_fragy;
2778     int             refi;
2779     int             pli;
2780     int             notstart;
2781     int             notdone;
2782 #ifdef HAVE_CAIRO
2783     int             telemetry;
2784     /*Save the current telemetry state.
2785       This prevents it from being modified in the middle of decoding this
2786        frame, which could cause us to skip calls to the striped decoding
2787        callback.*/
2788     telemetry=_dec->telemetry;
2789 #endif
2790     /*Select a free buffer to use for the reconstructed version of this frame.*/
2791     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2792      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2793     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2794     _dec->state.ref_frame_data[OC_FRAME_SELF]=
2795      _dec->state.ref_frame_bufs[refi][0].data;
2796 #if defined(HAVE_CAIRO)
2797     _dec->telemetry_frame_bytes=_op->bytes;
2798 #endif
2799     if(_dec->state.frame_type==OC_INTRA_FRAME){
2800       _dec->state.keyframe_num=_dec->state.curframe_num;
2801 #if defined(HAVE_CAIRO)
2802       _dec->telemetry_coding_bytes=
2803        _dec->telemetry_mode_bytes=
2804        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2805 #endif
2806     }
2807     else{
2808 #if defined(HAVE_CAIRO)
2809       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2810 #endif
2811       oc_dec_mb_modes_unpack(_dec);
2812 #if defined(HAVE_CAIRO)
2813       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2814 #endif
2815       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2816 #if defined(HAVE_CAIRO)
2817       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2818 #endif
2819     }
2820     oc_dec_block_qis_unpack(_dec);
2821 #if defined(HAVE_CAIRO)
2822     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2823 #endif
2824     oc_dec_residual_tokens_unpack(_dec);
2825     /*Update granule position.
2826       This must be done before the striped decode callbacks so that the
2827        application knows what to do with the frame data.*/
2828     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2829      _dec->state.info.keyframe_granule_shift)
2830      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2831     _dec->state.curframe_num++;
2832     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2833     /*All of the rest of the operations -- DC prediction reversal,
2834        reconstructing coded fragments, copying uncoded fragments, loop
2835        filtering, extending borders, and out-of-loop post-processing -- should
2836        be pipelined.
2837       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2838        copying are done for one or two super block rows, then loop filtering is
2839        run as far as it can, then bordering copying, then post-processing.
2840       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2841        block rows, and one chroma.
2842       Otherwise, an MCU consists of one super block row from each plane.
2843       Inside each MCU, we perform all of the steps on one color plane before
2844        moving on to the next.
2845       After reconstruction, the additional filtering stages introduce a delay
2846        since they need some pixels from the next fragment row.
2847       Thus the actual number of decoded rows available is slightly smaller for
2848        the first MCU, and slightly larger for the last.
2849
2850       This entire process allows us to operate on the data while it is still in
2851        cache, resulting in big performance improvements.
2852       An application callback allows further application processing (blitting
2853        to video memory, color conversion, etc.) to also use the data while it's
2854        in cache.*/
2855     oc_dec_pipeline_init(_dec,&_dec->pipe);
2856     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2857     notstart=0;
2858     notdone=1;
2859     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2860       int avail_fragy0;
2861       int avail_fragy_end;
2862       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2863       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2864       for(pli=0;pli<3;pli++){
2865         oc_fragment_plane *fplane;
2866         int                frag_shift;
2867         int                pp_offset;
2868         int                sdelay;
2869         int                edelay;
2870         fplane=_dec->state.fplanes+pli;
2871         /*Compute the first and last fragment row of the current MCU for this
2872            plane.*/
2873         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2874         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2875         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2876          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2877         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2878         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2879         sdelay=edelay=0;
2880         if(_dec->pipe.loop_filter){
2881           sdelay+=notstart;
2882           edelay+=notdone;
2883           oc_state_loop_filter_frag_rows(&_dec->state,
2884            _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2885            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2886         }
2887         /*To fill the borders, we have an additional two pixel delay, since a
2888            fragment in the next row could filter its top edge, using two pixels
2889            from a fragment in this row.
2890           But there's no reason to delay a full fragment between the two.*/
2891         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2892          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2893          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2894         /*Out-of-loop post-processing.*/
2895         pp_offset=3*(pli!=0);
2896         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2897           /*Perform de-blocking in one plane.*/
2898           sdelay+=notstart;
2899           edelay+=notdone;
2900           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2901            _dec->state.ref_frame_bufs[refi],pli,
2902            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2903           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2904             /*Perform de-ringing in one plane.*/
2905             sdelay+=notstart;
2906             edelay+=notdone;
2907             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2908              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2909           }
2910         }
2911         /*If no post-processing is done, we still need to delay a row for the
2912            loop filter, thanks to the strange filtering order VP3 chose.*/
2913         else if(_dec->pipe.loop_filter){
2914           sdelay+=notstart;
2915           edelay+=notdone;
2916         }
2917         /*Compute the intersection of the available rows in all planes.
2918           If chroma is sub-sampled, the effect of each of its delays is
2919            doubled, but luma might have more post-processing filters enabled
2920            than chroma, so we don't know up front which one is the limiting
2921            factor.*/
2922         avail_fragy0=OC_MINI(avail_fragy0,
2923          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2924         avail_fragy_end=OC_MINI(avail_fragy_end,
2925          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2926       }
2927 #ifdef HAVE_CAIRO
2928       if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2929 #else
2930       if(_dec->stripe_cb.stripe_decoded!=NULL){
2931 #endif
2932         /*The callback might want to use the FPU, so let's make sure they can.
2933           We violate all kinds of ABI restrictions by not doing this until
2934            now, but none of them actually matter since we don't use floating
2935            point ourselves.*/
2936         oc_restore_fpu(&_dec->state);
2937         /*Make the callback, ensuring we flip the sense of the "start" and
2938            "end" of the available region upside down.*/
2939         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2940          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2941          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2942       }
2943       notstart=1;
2944     }
2945     /*Finish filling in the reference frame borders.*/
2946     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2947     /*Update the reference frame indices.*/
2948     if(_dec->state.frame_type==OC_INTRA_FRAME){
2949       /*The new frame becomes both the previous and gold reference frames.*/
2950       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2951        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2952        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2953       _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2954        _dec->state.ref_frame_data[OC_FRAME_PREV]=
2955        _dec->state.ref_frame_data[OC_FRAME_SELF];
2956     }
2957     else{
2958       /*Otherwise, just replace the previous reference frame.*/
2959       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2960        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2961       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2962        _dec->state.ref_frame_data[OC_FRAME_SELF];
2963     }
2964     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2965        gamma values, if nothing else).*/
2966     oc_restore_fpu(&_dec->state);
2967 #ifdef HAVE_CAIRO
2968     /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2969     if(telemetry){
2970       oc_render_telemetry(_dec,stripe_buf,telemetry);
2971       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2972       /*If we had a striped decoding callback, we skipped calling it above
2973          (because the telemetry wasn't rendered yet).
2974         Call it now with the whole frame.*/
2975       if(_dec->stripe_cb.stripe_decoded!=NULL){
2976         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2977          stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2978       }
2979     }
2980 #endif
2981 #if defined(OC_DUMP_IMAGES)
2982     /*We only dump images if there were some coded blocks.*/
2983     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2984 #endif
2985     return 0;
2986   }
2987 }
2988
2989 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2990   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2991   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2992   return 0;
2993 }