Get rid of some dead code.
authorTimothy B. Terriberry <tterribe@xiph.org>
Sun, 26 May 2013 23:41:05 +0000 (16:41 -0700)
committerTimothy B. Terriberry <tterribe@xiph.org>
Sun, 26 May 2013 23:41:05 +0000 (16:41 -0700)
src/mc-normal_blend.c [deleted file]
src/mc-re.c [deleted file]
src/mcenc-level.c [deleted file]
src/mcenc-nolevel.c [deleted file]

diff --git a/src/mc-normal_blend.c b/src/mc-normal_blend.c
deleted file mode 100644 (file)
index 489003a..0000000
+++ /dev/null
@@ -1,1493 +0,0 @@
-/*Daala video codec
-Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
-
-#include <stddef.h>
-#include "internal.h"
-
-/*Interpolation along an edge by interpolating vectors.*/
-#define OD_MC_INTERP_VECTOR (0)
-/*Interpolation along an edge by blending samples from different vectors.*/
-#define OD_MC_INTERP_BLEND  (1)
-
-#define _MCInterpType(_a,_b,_c,_d) ((_a)|((_b)<<1)|((_c)<<2)|((_d)<<3))
-
-#define OD_MC_INTERP_VVVV (0)
-#define OD_MC_INTERP_BVVV (1)
-#define OD_MC_INTERP_VBVV (2)
-#define OD_MC_INTERP_BBVV (3)
-#define OD_MC_INTERP_VVBV (4)
-#define OD_MC_INTERP_BVBV (5)
-#define OD_MC_INTERP_VBBV (6)
-#define OD_MC_INTERP_BBBV (7)
-#define OD_MC_INTERP_VVVB (8)
-#define OD_MC_INTERP_BVVB (9)
-#define OD_MC_INTERP_VBVB (10)
-#define OD_MC_INTERP_BBVB (11)
-#define OD_MC_INTERP_VVBB (12)
-#define OD_MC_INTERP_BVBB (13)
-#define OD_MC_INTERP_VBBB (14)
-#define OD_MC_INTERP_BBBB (15)
-
-/*
-  Indexing of vetices and edges:
-  0--0--1
-  |     |
-  3     1
-  |     |
-  3--2--2
-
-  Interpolation formulas:
-  i = horizontal position in the destination block, 0...blk_sz-1
-  j = vertical position  in the destination block, 0...blk_sz-1
-  blk_sz = length of an edge in the destination block in pixels
-  blk_sz2 = blk_sz*blk_sz
-  w[k]   = bilinear VECTOR weight of vertex k:
-  w[0]   = (blk_sz-i)*(blk_sz-j)/blk_sz2
-  w[1]   = i*(blk_sz-j)/blk_sz2
-  w[2]   = i*j/blk_sz2
-  w[3]   = (blk_sz-i)*j/blk_sz2
-  u[k]   = bilinear weight of edge k:
-  u[0]   = (blk_sz-j)/blk_sz
-  u[1]   = i/blk_sz
-  u[2]   = j/blk_sz
-  u[3]   = (blk_sz-i)/blk_sz
-  s[k]   = bilinear weight of vertex k:
-  m[k]   = motion vector k
-  s[k,c] = BLEND weight of vertex k in a split block whose outside corner is c:
-  s[c+0,c] = w[c]+w[c+1]/2+w[c+3]/2
-  s[c+1,c] = w[c+1]/2
-  s[c+2,c] = w[c+2]
-  s[c+3,c] = w[c+3]/2
-  src[m] = source image value of pixel (i,j) offset by the given motion
-   vector, bilinearly interpolated from the values at half-pel locations.
-   First, horizontal interpolation is done, and the result is truncated to
-   an integer, followed by vertical interpolation and truncation to an
-   integer.
-
-  Blending and interpolation functions are defined by the type of edges:
-  V: vector interpolation edge.
-     Along this edge, the motion vector is linearly interpolated from the
-      value at one vertex to the other.
-  B: blend edge
-     Along this edge, the predicted value is linearly interpolated between
-      that of the source image offset by the motion vector at one vertex
-      to the value of the source image offset by the motion vector at the
-      other vertex.
-  This allows for C^0 switching between overlapped block motion compensation
-   and smooth, per-pixel motion vector fields.
-  The various interpolation formulas below are chosen so that:
-   1) They have the proper form along each edge,
-   2) They still match each other when a block is split to add a new motion
-       vector in the middle (subject to the modifications to the blending
-       weights listed above).
-   2) All the vectors can be computed with simple bilinear interpolation,
-   3) All the resulting source image pixels can be blended with simple
-       bilinear interpolation.
-
-  There are 6 similar classes of edge configurations:
-
-  VVVV:
-     src[m[0]*w[0]+m[1]*w[1]+m[2]*w[2]+m[3]*w[3]]
-  BVVV (and rotations thereof):
-     src[u[0]*m[0]+w[2]*m[2]+w[3]*m[3]]*w[0]+
-      src[u[0]*m[1]+w[2]*m[2]+w[3]*m[3]]*w[1]+
-      src[m[0]*w[0]+m[1]*w[1]+m[2]*w[2]+m[3]*w[3]]*u[2]
-  BVBV (and rotation thereof):
-     src[u[0]*m[0]+u[3]*m[2]]*u[2]+
-      src[u[0]*m[1]+u[3]*m[3]]*u[1]
-  VVBB (and rotations thereof):
-     src[(w[0]+u[2])*m[0]+w[1]*m[1]]*w[0]+
-      src[w[0]*m[0]+w[1]*m[1]+w[2]*m[2]+w[3]*m[3]]*w[1]+
-      src[w[1]*m[1]+(w[2]+u[3])*m[2]]*w[2]+
-      src[m[3]]*w[3]
-  VBBB (and rotations thereof):
-     src[(u[3]+w[2])*m[0]+w[1]*m[1]]*w[0]+
-      src[w[0]*m[0]+(w[1]+u[2])*m[1]]*w[1]+
-      src[m[2]]*w[2]+src[m[3]]*w[3]
-  BBBB:
-     src[m[0]]*w[0]+src[m[1]]*w[1]+src[m[2]]*w[2]+src[m[3]]*w[3]
-
-  The remaining 10 edge configurations can be obtained by rotations of these
-   formulas.
-
-  Lots of indexing and finite differences where tiny errors may be made here.
-  This is also a good candidate for SIMD optimization.
-  Each case can be optimized separately.
-  The first and last are expected to be more common than the others.
-*/
-
-/*A table of indices used to set up the rotated versions of each vector
-   interpolation formula (see doc/mc-interpolation.nb for details).*/
-static const int MIDXS[][4]={
-  {0,1,2,3},/*0*/
-  {0,0,2,3},/*1*/
-  {1,1,2,3},/*2*/
-  {0,0,3,3},/*3*/
-  {1,1,2,2},/*4*/
-  {0,1,0,0},/*5*/
-  {2,1,2,2},/*6*/
-  {0,1,1,1},/*7*/
-};
-
-/*These are for finite differences for the edge and vertex weights.*/
-static const int SIDXS[][4]={
-  { 1, 0, 0, 0},/*0*//*w0[k]*/
-  {-1, 1, 0, 0},/*1*//*dwdi[k]*/
-  {-1, 0, 0, 1},/*2*//*dwdj[k]*/
-  { 1,-1, 1,-1},/*3*//*ddwdidj[k]*/
-  { 0, 0, 1, 1},/*4*//*u0[k+3&3]*/
-  { 1, 0,-1, 0},/*5*//*dudi[k+3&3*/
-  { 0, 1, 0,-1},/*6*//*dudj[k+3&3*/
-};
-
-/*Set up the finite differences needed to interpolate a motion vector
-   component.
-  _x0:         The initial value.
-  _dxdi:       The initial amount to increment per unit change in i.
-  _dxdj:       The amount to increment per unit change in j.
-  _ddxdidj:    The amount to increment _dxdi by per unit change in j.
-  _mvx:        The component value of the 4 motion vectors.
-  _m:          The index of the motion vector to use for each corner in the
-                base orientation.
-  _r:          The amount to rotate (clockwise) the formulas by (0...3).
-  _log_blk_sz: The log base 2 of the block size.*/
-static void od_mc_setup_mvc(ogg_int32_t *_x0,ogg_int32_t *_dxdi,
- ogg_int32_t *_dxdj,ogg_int32_t *_ddxdidj,const ogg_int32_t _mvx[4],
- const int _m[4],int _r,int _log_blk_sz){
-  int log_blk_sz2;
-  int k;
-  log_blk_sz2=_log_blk_sz<<1;
-  *_x0=_mvx[_m[0-_r&3]+_r&3];
-  *_dxdi=_mvx[_m[1-_r&3]+_r&3]-*_x0>>_log_blk_sz;
-  *_dxdj=_mvx[_m[3-_r&3]+_r&3]-*_x0>>_log_blk_sz;
-  *_ddxdidj=_mvx[_m[0-_r&3]+_r&3]+_mvx[_m[2-_r&3]+_r&3]-
-   _mvx[_m[1-_r&3]+_r&3]-_mvx[_m[3-_r&3]+_r&3]>>log_blk_sz2;
-}
-
-static void od_mc_setup_w(int _w0[4],int _dwdi[4],int _dwdj[4],
- int _ddwdidj[4],int _r,int _log_blk_sz){
-  int log_blk_sz2;
-  log_blk_sz2=_log_blk_sz<<1;
-  _w0[0-_r&3]=1<<log_blk_sz2;
-  _w0[1-_r&3]=_w0[2-_r&3]=_w0[3-_r&3]=0;
-  _dwdi[0-_r&3]=-1<<_log_blk_sz;
-  _dwdi[1-_r&3]=1<<_log_blk_sz;
-  _dwdi[2-_r&3]=_dwdi[3-_r&3]=0;
-  _dwdj[0-_r&3]=-1<<_log_blk_sz;
-  _dwdj[1-_r&3]=_dwdj[2-_r&3]=0;
-  _dwdj[3-_r&3]=1<<_log_blk_sz;
-  _ddwdidj[0-_r&3]=_ddwdidj[2-_r&3]=1;
-  _ddwdidj[1-_r&3]=_ddwdidj[3-_r&3]=-1;
-}
-
-static void od_mc_predict8(unsigned char *_dst,int _dystride,
- const unsigned char *_src,int _systride,const ogg_int32_t _mvx[4],
- const ogg_int32_t _mvy[4],int _interp_type,int _log_blk_sz){
-  int log_blk_sz2;
-  int blk_sz;
-  int r;
-  blk_sz=1<<_log_blk_sz;
-  log_blk_sz2=_log_blk_sz<<1;
-  r=0;
-  switch(_interp_type){
-    case OD_MC_INTERP_VVVV:{
-      ogg_int32_t x0;
-      ogg_int32_t y0;
-      ogg_int32_t x;
-      ogg_int32_t y;
-      ogg_int32_t dxdi;
-      ogg_int32_t dydi;
-      ogg_int32_t dxdj;
-      ogg_int32_t dydj;
-      ogg_int32_t ddxdidj;
-      ogg_int32_t ddydidj;
-      int         i;
-      int         j;
-      od_mc_setup_mvc(&x0,&dxdi,&dxdj,&ddxdidj,_mvx,
-       MIDXS[0]/*0,1,2,3*/,0,_log_blk_sz);
-      od_mc_setup_mvc(&y0,&dydi,&dydj,&ddydidj,_mvy,
-       MIDXS[0]/*0,1,2,3,*/,0,_log_blk_sz);
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        x=x0;
-        y=y0;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          const unsigned char *p;
-          ogg_int32_t          xf;
-          ogg_int32_t          yf;
-          printf("<%8.4lf,%8.4lf>%s",x/(double)0x40000,y/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          xf=x&0x3FFFF;
-          yf=y&0x3FFFF;
-          p=_src+(x>>18)+i+((y>>18)+j)*_systride;
-          dst[0]=(unsigned char)((p[0]*(0x40000-xf)+p[1]*xf>>18)*
-           (0x40000-yf)+(p[_systride]*(0x40000-xf)+
-           p[_systride+1]*xf>>18)*yf>>18);
-          x+=dxdi;
-          y+=dydi;
-          dst++;
-        }
-        x0+=dxdj;
-        y0+=dydj;
-        dxdi+=ddxdidj;
-        dydi+=ddydidj;
-        _dst+=_dystride;
-      }
-    }break;
-    case OD_MC_INTERP_VVVB:r++;
-    case OD_MC_INTERP_VVBV:r++;
-    case OD_MC_INTERP_VBVV:r++;
-    case OD_MC_INTERP_BVVV:{
-      ogg_int32_t x0[3];
-      ogg_int32_t y0[3];
-      ogg_int32_t x[3];
-      ogg_int32_t y[3];
-      ogg_int32_t dxdi[3];
-      ogg_int32_t dydi[3];
-      ogg_int32_t dxdj[3];
-      ogg_int32_t dydj[3];
-      ogg_int32_t ddxdidj[3];
-      ogg_int32_t ddydidj[3];
-      int         w0[4];
-      int         w[4];
-      int         dwdi[4];
-      int         i;
-      int         j;
-      int         k;
-      od_mc_setup_mvc(x0+0,dxdi+0,dxdj+0,ddxdidj+0,_mvx,
-       MIDXS[1]/*0,0,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+0,dydi+0,dydj+0,ddydidj+0,_mvy,
-       MIDXS[1]/*0,0,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+1,dxdi+1,dxdj+1,ddxdidj+1,_mvx,
-       MIDXS[2]/*1,1,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+1,dydi+1,dydj+1,ddydidj+1,_mvy,
-       MIDXS[2]/*1,1,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+2,dxdi+2,dxdj+2,ddxdidj+2,_mvx,
-       MIDXS[0]/*0,1,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+2,dydi+2,dydj+2,ddydidj+2,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_log_blk_sz);
-      w0[0-r&3]=1<<log_blk_sz2;
-      w0[1-r&3]=w0[2-r&3]=w0[3-r&3]=0;
-      dwdi[0-r&3]=-blk_sz;
-      dwdi[1-r&3]=blk_sz;
-      dwdi[2-r&3]=dwdi[3-r&3]=0;
-      for(k=0;k<3;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-      }
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        dst=_dst;
-        for(k=0;k<4;k++)w[k]=w0[k];
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[3];
-          for(k=0;k<3;k++){
-            const unsigned char *p;
-            ogg_int32_t          xf;
-            ogg_int32_t          yf;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            p=_src+(x[k]>>18)+i+((y[k]>>18)+j)*_systride;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[2]+w[3],
-           (x[2]-dxdi[2])/(double)0x40000,
-           (y[2]-dydi[2])/(double)0x40000,i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*(w[2]+w[3])>>log_blk_sz2);
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<3;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          dxdi[k]+=ddxdidj[k];
-          dydi[k]+=ddydidj[k];
-          x[k]=x0[k];
-          y[k]=y0[k];
-        }
-        w0[0-r&3]-=blk_sz;
-        w0[3-r&3]+=blk_sz;
-        dwdi[0-r&3]++;
-        dwdi[1-r&3]--;
-        dwdi[2-r&3]++;
-        dwdi[3-r&3]--;
-        _dst+=_dystride;
-      }
-    }break;
-    case OD_MC_INTERP_VBVB:r++;
-    case OD_MC_INTERP_BVBV:{
-      ogg_int32_t x0[2];
-      ogg_int32_t y0[2];
-      ogg_int32_t x[2];
-      ogg_int32_t y[2];
-      ogg_int32_t dxdi[2];
-      ogg_int32_t dydi[2];
-      ogg_int32_t dxdj[2];
-      ogg_int32_t dydj[2];
-      int         i[2];
-      int         k;
-      od_mc_setup_mvc(x0+0,dxdi+0,dxdj+0,i,_mvx,
-       MIDXS[3]/*0,0,3,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+0,dydi+0,dydj+0,i,_mvy,
-       MIDXS[3]/*0,0,3,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+1,dxdi+1,dxdj+1,i,_mvx,
-       MIDXS[4]/*1,1,2,2*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+1,dydi+1,dydj+1,i,_mvy,
-       MIDXS[4]/*1,1,2,2*/,r,_log_blk_sz);
-      for(i[1]=0;i[1]<=blk_sz;i[1]++){
-        unsigned char *dst;
-        for(k=0;k<2;k++){
-          x[k]=x0[k];
-          y[k]=y0[k];
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-        }
-        dst=_dst;
-        for(i[0]=0;i[0]<=blk_sz;i[0]++){
-          ogg_int32_t c[3];
-          for(k=0;k<2;k++){
-            const unsigned char *p;
-            ogg_int32_t          xf;
-            ogg_int32_t          yf;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            p=_src+(x[k]>>18)+i[0]+((y[k]>>18)+i[1])*_systride;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          printf("%2X<%8.4lf,%8.4lf>",blk_sz-i[r],
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%2X<%8.4lf,%8.4lf>%s",i[r],
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000,i[0]<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(c[0]*(blk_sz-i[r])+c[1]*i[r]>>_log_blk_sz);
-          dst++;
-        }
-        _dst+=_dystride;
-      }
-    }break;
-    case OD_MC_INTERP_VBBV:r++;
-    case OD_MC_INTERP_BBVV:r++;
-    case OD_MC_INTERP_BVVB:r++;
-    case OD_MC_INTERP_VVBB:{
-      const unsigned char *mvp;
-      ogg_int32_t          mvxf;
-      ogg_int32_t          mvyf;
-      ogg_int32_t          x0[3];
-      ogg_int32_t          y0[3];
-      ogg_int32_t          x[3];
-      ogg_int32_t          y[3];
-      ogg_int32_t          dxdi[3];
-      ogg_int32_t          dydi[3];
-      ogg_int32_t          dxdj[3];
-      ogg_int32_t          dydj[3];
-      ogg_int32_t          ddxdidj[3];
-      ogg_int32_t          ddydidj[3];
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[4];
-      int                  dwdi[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      od_mc_setup_mvc(x0+0,dxdi+0,dxdj+0,ddxdidj+0,_mvx,
-       MIDXS[5]/*0,1,0,0*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+0,dydi+0,dydj+0,ddydidj+0,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+1,dxdi+1,dxdj+1,ddxdidj+1,_mvx,
-       MIDXS[0]/*0,1,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+1,dydi+1,dydj+1,ddydidj+1,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+2,dxdi+2,dxdj+2,ddxdidj+2,_mvx,
-       MIDXS[6]/*2,1,2,2*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+2,dydi+2,dydj+2,ddydidj+2,_mvy,
-       MIDXS[6]/*2,1,2,2*/,r,_log_blk_sz);
-      mvp=_src+(_mvx[3+r&3]>>18)+(_mvy[3+r&3]>>18)*_systride;
-      mvxf=_mvx[3+r&3]&0x3FFFF;
-      mvyf=_mvy[3+r&3]&0x3FFFF;
-      o0=0;
-      w0[0-r&3]=1<<log_blk_sz2;
-      w0[1-r&3]=w0[2-r&3]=w0[3-r&3]=0;
-      dwdi[0-r&3]=-blk_sz;
-      dwdi[1-r&3]=blk_sz;
-      dwdi[2-r&3]=dwdi[3-r&3]=0;
-      for(k=0;k<3;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-      }
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(k=0;k<4;k++)w[k]=w0[k];
-        for(i=0;i<=blk_sz;i++){
-          const unsigned char *p;
-          ogg_int32_t          c[4];
-          ogg_int32_t          xf;
-          ogg_int32_t          yf;
-          for(k=0;k<3;k++){
-            p=_src+o+(x[k]>>18)+(y[k]>>18)*_systride;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          p=mvp+o;
-          c[3]=(p[0]*(0x40000-mvxf)+p[1]*mvxf>>18)*(0x40000-mvyf)+
-           (p[_systride]*(0x40000-mvxf)+p[_systride+1]*mvxf>>18)*mvyf>>18;
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           (x[2]-dxdi[2])/(double)0x40000,
-           (y[2]-dydi[2])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3+r&3]/(double)0x40000,
-           _mvy[3+r&3]/(double)0x40000,i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2);
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<3;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          x[k]=x0[k];
-          y[k]=y0[k];
-          dxdi[k]+=ddxdidj[k];
-          dydi[k]+=ddydidj[k];
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        w0[0-r&3]-=blk_sz;
-        w0[3-r&3]+=blk_sz;
-        dwdi[0-r&3]++;
-        dwdi[1-r&3]--;
-        dwdi[2-r&3]++;
-        dwdi[3-r&3]--;
-      }
-    }break;
-    case OD_MC_INTERP_BBBV:r++;
-    case OD_MC_INTERP_BBVB:r++;
-    case OD_MC_INTERP_BVBB:r++;
-    case OD_MC_INTERP_VBBB:{
-      const unsigned char *mvp[2];
-      ogg_int32_t          mvxf[2];
-      ogg_int32_t          mvyf[2];
-      ogg_int32_t          x0[2];
-      ogg_int32_t          y0[2];
-      ogg_int32_t          x[2];
-      ogg_int32_t          y[2];
-      ogg_int32_t          dxdi[2];
-      ogg_int32_t          dydi[2];
-      ogg_int32_t          dxdj[2];
-      ogg_int32_t          dydj[2];
-      ogg_int32_t          ddxdidj;
-      ogg_int32_t          ddydidj;
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[4];
-      int                  dwdi[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      od_mc_setup_mvc(x0+0,dxdi+0,dxdj+0,&ddxdidj,_mvx,
-       MIDXS[5]/*0,1,0,0*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+0,dydi+0,dydj+0,&ddydidj,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_log_blk_sz);
-      od_mc_setup_mvc(x0+1,dxdi+1,dxdj+1,&ddxdidj,_mvx,
-       MIDXS[7]/*0,1,1,1*/,r,_log_blk_sz);
-      od_mc_setup_mvc(y0+1,dydi+1,dydj+1,&ddydidj,_mvy,
-       MIDXS[7]/*0,1,1,1*/,r,_log_blk_sz);
-      for(k=0;k<2;k++){
-        mvp[k]=_src+(_mvx[2+k+r&3]>>18)+(_mvy[2+k+r&3]>>18)*_systride;
-        mvxf[k]=_mvx[2+k+r&3]&0x3FFFF;
-        mvyf[k]=_mvy[2+k+r&3]&0x3FFFF;
-      }
-      o0=0;
-      w0[0-r&3]=1<<log_blk_sz2;
-      w0[1-r&3]=w0[2-r&3]=w0[3-r&3]=0;
-      dwdi[0-r&3]=-blk_sz;
-      dwdi[1-r&3]=blk_sz;
-      dwdi[2-r&3]=dwdi[3-r&3]=0;
-      for(k=0;k<2;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-      }
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(k=0;k<4;k++)w[k]=w0[k];
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[4];
-          for(k=0;k<2;k++){
-            const unsigned char *p;
-            ogg_int32_t          xf;
-            ogg_int32_t          yf;
-            p=_src+o+(x[k]>>18)+(y[k]>>18)*_systride;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            p=mvp[k]+o;
-            c[k+2]=(p[0]*(0x40000-mvxf[k])+p[1]*mvxf[k]>>18)*
-             (0x40000-mvyf[k])+(p[_systride]*(0x40000-mvxf[k])+
-             p[_systride+1]*mvxf[k]>>18)*mvyf[k]>>18;
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,(y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,(y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           _mvx[2+r&3]/(double)0x40000,_mvy[2+r&3]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3+r&3]/(double)0x40000,_mvy[3+r&3]/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2);
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<2;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          x[k]=x0[k];
-          y[k]=y0[k];
-          dxdi[k]+=ddxdidj;
-          dydi[k]+=ddydidj;
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        w0[0-r&3]-=blk_sz;
-        w0[3-r&3]+=blk_sz;
-        dwdi[0-r&3]++;
-        dwdi[1-r&3]--;
-        dwdi[2-r&3]++;
-        dwdi[3-r&3]--;
-      }
-    }break;
-    case OD_MC_INTERP_BBBB:{
-      const unsigned char *mvp[4];
-      ogg_int32_t          mvxf[4];
-      ogg_int32_t          mvyf[4];
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[4];
-      int                  dwdi[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      for(k=0;k<4;k++){
-        mvp[k]=_src+(_mvx[k]>>18)+(_mvy[k]>>18)*_systride;
-        mvxf[k]=_mvx[k]&0x3FFFF;
-        mvyf[k]=_mvy[k]&0x3FFFF;
-      }
-      o0=0;
-      w0[0]=1<<log_blk_sz2;
-      w0[1]=w0[2]=w0[3]=0;
-      dwdi[0]=-blk_sz;
-      dwdi[1]=blk_sz;
-      dwdi[2]=dwdi[3]=0;
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(k=0;k<4;k++)w[k]=w0[k];
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[4];
-          for(k=0;k<4;k++){
-            const unsigned char *p;
-            p=mvp[k]+o;
-            c[k]=(p[0]*(0x40000-mvxf[k])+p[1]*mvxf[k]>>18)*(0x40000-mvyf[k])+
-             (p[_systride]*(0x40000-mvxf[k])+
-             p[_systride+1]*mvxf[k]>>18)*mvyf[k]>>18;
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           _mvx[0]/(double)0x40000,_mvy[0]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           _mvx[1]/(double)0x40000,_mvy[1]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           _mvx[2]/(double)0x40000,_mvy[2]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3]/(double)0x40000,_mvy[3]/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2);
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        w0[0]-=blk_sz;
-        w0[3]+=blk_sz;
-        dwdi[0]++;
-        dwdi[1]--;
-        dwdi[2]++;
-        dwdi[3]--;
-      }
-    }
-  }
-}
-
-static void od_mc_setup_mvc_split(ogg_int32_t *_x0,ogg_int32_t *_dxdi,
- ogg_int32_t *_dxdj,ogg_int32_t *_ddxdidj,const ogg_int32_t _mvx[4],
- const int _m[4],int _r,int _c,int _s1,int _s3,int _log_blk_sz){
-  ogg_int32_t dx;
-  int         log_blk_sz2;
-  int         k;
-  od_mc_setup_mvc(_x0,_dxdi,_dxdj,_ddxdidj,_mvx,_m,_r,_log_blk_sz);
-  log_blk_sz2=_log_blk_sz<<1;
-  if(_s1){
-    k=_c+1&3;
-    dx=_mvx[_m[_c-_r&3]+_r&3]-_mvx[_m[k-_r&3]+_r&3]>>1;
-    *_x0+=dx*SIDXS[0][k];
-    *_dxdi+=dx*SIDXS[1][k]>>_log_blk_sz;
-    *_dxdj+=dx*SIDXS[2][k]>>_log_blk_sz;
-    *_ddxdidj+=dx*SIDXS[3][k]>>log_blk_sz2;
-  }
-  if(_s3){
-    k=_c+3&3;
-    dx=_mvx[_m[_c-_r&3]+_r&3]-_mvx[_m[k-_r&3]+_r&3]>>1;
-    *_x0+=dx*SIDXS[0][k];
-    *_dxdi+=dx*SIDXS[1][k]>>_log_blk_sz;
-    *_dxdj+=dx*SIDXS[2][k]>>_log_blk_sz;
-    *_ddxdidj+=dx*SIDXS[3][k]>>log_blk_sz2;
-  }
-}
-
-static void od_mc_setup_w_split(int _w0[4],int _dwdi[4],int _dwdj[4],
- int _ddwdidj[4],int _r,int _c,int _s1,int _s3,int _log_blk_sz){
-  int log_blk_sz2;
-  int k;
-  log_blk_sz2=_log_blk_sz<<1;
-  _w0[0-_r&3]=2<<log_blk_sz2;
-  _w0[1-_r&3]=_w0[2-_r&3]=_w0[3-_r&3]=0;
-  _dwdi[0-_r&3]=-2<<_log_blk_sz;
-  _dwdi[1-_r&3]=2<<_log_blk_sz;
-  _dwdi[2-_r&3]=_dwdi[3-_r&3]=0;
-  _dwdj[0-_r&3]=-2<<_log_blk_sz;
-  _dwdj[1-_r&3]=_dwdj[2-_r&3]=0;
-  _dwdj[3-_r&3]=2<<_log_blk_sz;
-  _ddwdidj[0-_r&3]=_ddwdidj[2-_r&3]=2;
-  _ddwdidj[1-_r&3]=_ddwdidj[3-_r&3]=-2;
-  _c=_c-_r&3;
-  if(_s1){
-    k=_c+1&3;
-    _w0[k]>>=1;
-    _w0[_c]+=_w0[k];
-    _dwdi[k]>>=1;
-    _dwdi[_c]+=_dwdi[k];
-    _dwdj[k]>>=1;
-    _dwdj[_c]+=_dwdj[k];
-    _ddwdidj[k]>>=1;
-    _ddwdidj[_c]+=_ddwdidj[k];
-  }
-  if(_s3){
-    k=_c+3&3;
-    _w0[k]>>=1;
-    _w0[_c]+=_w0[k];
-    _dwdi[k]>>=1;
-    _dwdi[_c]+=_dwdi[k];
-    _dwdj[k]>>=1;
-    _dwdj[_c]+=_dwdj[k];
-    _ddwdidj[k]>>=1;
-    _ddwdidj[_c]+=_ddwdidj[k];
-  }
-}
-
-static void od_mc_predict8_split(unsigned char *_dst,int _dystride,
- const unsigned char *_src,int _systride,const ogg_int32_t _mvx[4],
- const ogg_int32_t _mvy[4],int _interp_type,int _c,int _s1,int _s3,
- int _log_blk_sz){
-  int log_blk_sz2;
-  int blk_sz;
-  int r;
-  blk_sz=1<<_log_blk_sz;
-  log_blk_sz2=_log_blk_sz<<1;
-  r=0;
-  switch(_interp_type){
-    case OD_MC_INTERP_VVVV:{
-      ogg_int32_t x0;
-      ogg_int32_t y0;
-      ogg_int32_t x;
-      ogg_int32_t y;
-      ogg_int32_t dxdi;
-      ogg_int32_t dydi;
-      ogg_int32_t dxdj;
-      ogg_int32_t dydj;
-      ogg_int32_t ddxdidj;
-      ogg_int32_t ddydidj;
-      int         i;
-      int         j;
-      od_mc_setup_mvc_split(&x0,&dxdi,&dxdj,&ddxdidj,_mvx,
-       MIDXS[0]/*0,1,2,3*/,0,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(&y0,&dydi,&dydj,&ddydidj,_mvy,
-       MIDXS[0]/*0,1,2,3*/,0,_c,0,0,_log_blk_sz);
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        x=x0;
-        y=y0;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          const unsigned char *p;
-          ogg_int32_t          xf;
-          ogg_int32_t          yf;
-          printf("<%8.4lf,%8.4lf>%s",x/(double)0x40000,y/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          xf=x&0x3FFFF;
-          yf=y&0x3FFFF;
-          p=_src+(x>>18)+i+((y>>18)+j)*_systride;
-          dst[0]=(unsigned char)((p[0]*(0x40000-xf)+p[1]*xf>>18)*
-           (0x40000-yf)+(p[_systride]*(0x40000-xf)+
-           p[_systride+1]*xf>>18)*yf>>18);
-          x+=dxdi;
-          y+=dydi;
-          dst++;
-        }
-        x0+=dxdj;
-        y0+=dydj;
-        dxdi+=ddxdidj;
-        dydi+=ddydidj;
-        _dst+=_dystride;
-      }
-    }break;
-    case OD_MC_INTERP_VVVB:r++;
-    case OD_MC_INTERP_VVBV:r++;
-    case OD_MC_INTERP_VBVV:r++;
-    case OD_MC_INTERP_BVVV:{
-      ogg_int32_t x0[3];
-      ogg_int32_t y0[3];
-      ogg_int32_t x[3];
-      ogg_int32_t y[3];
-      ogg_int32_t dxdi[3];
-      ogg_int32_t dydi[3];
-      ogg_int32_t dxdj[3];
-      ogg_int32_t dydj[3];
-      ogg_int32_t ddxdidj[3];
-      ogg_int32_t ddydidj[3];
-      int         w0[4];
-      int         w[4];
-      int         dwdi[4];
-      int         dwdj[4];
-      int         ddwdidj[4];
-      int         i;
-      int         j;
-      int         k;
-      od_mc_setup_mvc_split(x0+0,dxdi+0,dxdj+0,ddxdidj+0,_mvx,
-       MIDXS[1]/*0,0,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+0,dydi+0,dydj+0,ddydidj+0,_mvy,
-       MIDXS[1]/*0,0,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+1,dxdi+1,dxdj+1,ddxdidj+1,_mvx,
-       MIDXS[2]/*1,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+1,dydi+1,dydj+1,ddydidj+1,_mvy,
-       MIDXS[2]/*1,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+2,dxdi+2,dxdj+2,ddxdidj+2,_mvx,
-       MIDXS[0]/*0,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+2,dydi+2,dydj+2,ddydidj+2,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_w_split(w0,dwdi,dwdj,ddwdidj,r,
-       _c,_s1&&(_c+1-r&3)==1,_s3&&(_c+3-r&3)==0,_log_blk_sz);
-      w0[2]+=w0[3];
-      dwdi[2]+=dwdi[3];
-      dwdj[2]+=dwdj[3];
-      ddwdidj[2]+=ddwdidj[3];
-      for(k=0;k<3;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-        w[k]=w0[k];
-      }
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[3];
-          for(k=0;k<3;k++){
-            const unsigned char *p;
-            ogg_int32_t          xf;
-            ogg_int32_t          yf;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            p=_src+(x[k]>>18)+i+((y[k]>>18)+j)*_systride;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[2],
-           (x[2]-dxdi[2])/(double)0x40000,
-           (y[2]-dydi[2])/(double)0x40000,i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]>>log_blk_sz2+1);
-          dst++;
-          for(k=0;k<3;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<3;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          w0[k]+=dwdj[k];
-          dxdi[k]+=ddxdidj[k];
-          dydi[k]+=ddydidj[k];
-          dwdi[k]+=ddwdidj[k];
-          x[k]=x0[k];
-          y[k]=y0[k];
-          w[k]=w0[k];
-        }
-        _dst+=_dystride;
-      }
-    }break;
-    case OD_MC_INTERP_VBVB:r++;
-    case OD_MC_INTERP_BVBV:{
-      ogg_int32_t x0[2];
-      ogg_int32_t y0[2];
-      ogg_int32_t x[2];
-      ogg_int32_t y[2];
-      ogg_int32_t dxdi[2];
-      ogg_int32_t dydi[2];
-      ogg_int32_t dxdj[2];
-      ogg_int32_t dydj[2];
-      int         u0;
-      int         u;
-      int         dudi;
-      int         dudj;
-      int         ddudidj;
-      int         i;
-      int         j;
-      int         k;
-      od_mc_setup_mvc_split(x0+0,dxdi+0,dxdj+0,&i,_mvx,
-       MIDXS[3]/*0,0,3,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+0,dydi+0,dydj+0,&i,_mvy,
-       MIDXS[3]/*0,0,3,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+1,dxdi+1,dxdj+1,&i,_mvx,
-       MIDXS[4]/*1,1,2,2*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+1,dydi+1,dydj+1,&i,_mvy,
-       MIDXS[4]/*1,1,2,2*/,r,_c,0,0,_log_blk_sz);
-      u0=SIDXS[4][r]/*0,0,1,1*/<<log_blk_sz2+1;
-      dudi=SIDXS[5][r]/*1,0,-1,0*/<<_log_blk_sz+1;
-      dudj=SIDXS[6][r]/*0,1,0,-1*/<<_log_blk_sz+1;
-      ddudidj=0;
-      if(_s3&&(_c-r&1))k=_c+3&3;
-      else if(_s1&&!(_c-r&1))k=_c+1&3;
-      else k=-1;
-      if(k>=0){
-        if(k+1-r&2){
-          u0-=SIDXS[0][k]<<log_blk_sz2;
-          dudi-=SIDXS[1][k]/*-1,1,0,0*/<<_log_blk_sz;
-          dudj-=SIDXS[2][k]/*-1,0,0,1*/<<_log_blk_sz;
-          ddudidj-=SIDXS[3][k]/*1,-1,1,-1*/;
-        }
-        else{
-          u0+=SIDXS[0][k]<<log_blk_sz2;
-          dudi+=SIDXS[1][k]/*-1,1,0,0*/<<_log_blk_sz;
-          dudj+=SIDXS[2][k]/*-1,0,0,1*/<<_log_blk_sz;
-          ddudidj+=SIDXS[3][k]/*1,-1,1,-1*/;
-        }
-      }
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        for(k=0;k<2;k++){
-          x[k]=x0[k];
-          y[k]=y0[k];
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-        }
-        dst=_dst;
-        u=u0;
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[3];
-          for(k=0;k<2;k++){
-            const unsigned char *p;
-            ogg_int32_t          xf;
-            ogg_int32_t          yf;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            p=_src+(x[k]>>18)+i+((y[k]>>18)+j)*_systride;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          printf("%2X<%8.4lf,%8.4lf>",(1<<log_blk_sz2+1)-u,
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%2X<%8.4lf,%8.4lf>%s",u,
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000,i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*((1<<log_blk_sz2+1)-u)+c[1]*u>>log_blk_sz2+1);
-          dst++;
-          u+=dudi;
-        }
-        _dst+=_dystride;
-        u0+=dudj;
-        dudi+=ddudidj;
-      }
-    }break;
-    case OD_MC_INTERP_VBBV:r++;
-    case OD_MC_INTERP_BBVV:r++;
-    case OD_MC_INTERP_BVVB:r++;
-    case OD_MC_INTERP_VVBB:{
-      const unsigned char *mvp;
-      ogg_int32_t          mvxf;
-      ogg_int32_t          mvyf;
-      ogg_int32_t          x0[3];
-      ogg_int32_t          y0[3];
-      ogg_int32_t          x[3];
-      ogg_int32_t          y[3];
-      ogg_int32_t          dxdi[3];
-      ogg_int32_t          dydi[3];
-      ogg_int32_t          dxdj[3];
-      ogg_int32_t          dydj[3];
-      ogg_int32_t          ddxdidj[3];
-      ogg_int32_t          ddydidj[3];
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[4];
-      int                  dwdi[4];
-      int                  dwdj[4];
-      int                  ddwdidj[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      od_mc_setup_mvc_split(x0+0,dxdi+0,dxdj+0,ddxdidj+0,_mvx,
-       MIDXS[5]/*0,1,0,0*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+0,dydi+0,dydj+0,ddydidj+0,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+1,dxdi+1,dxdj+1,ddxdidj+1,_mvx,
-       MIDXS[0]/*0,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+1,dydi+1,dydj+1,ddydidj+1,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+2,dxdi+2,dxdj+2,ddxdidj+2,_mvx,
-       MIDXS[6]/*2,1,2,2*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+2,dydi+2,dydj+2,ddydidj+2,_mvy,
-       MIDXS[6]/*2,1,2,2*/,r,_c,0,0,_log_blk_sz);
-      mvp=_src+(_mvx[3+r&3]>>18)+(_mvy[3+r&3]>>18)*_systride;
-      mvxf=_mvx[3+r&3]&0x3FFFF;
-      mvyf=_mvy[3+r&3]&0x3FFFF;
-      o0=0;
-      od_mc_setup_w_split(w0,dwdi,dwdj,ddwdidj,r,
-       _c,_s1&&(_c-r&2),_s3&&(_c+3-r&2),_log_blk_sz);
-      for(k=0;k<3;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-      }
-      for(k=0;k<4;k++)w[k]=w0[k];
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t    c[4];
-          const unsigned char *p;
-          ogg_int32_t          xf;
-          ogg_int32_t          yf;
-          for(k=0;k<3;k++){
-            p=_src+o+(x[k]>>18)+(y[k]>>18)*_systride;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-          }
-          p=mvp+o;
-          c[3]=(p[0]*(0x40000-mvxf)+p[1]*mvxf>>18)*(0x40000-mvyf)+
-           (p[_systride]*(0x40000-mvxf)+p[_systride+1]*mvxf>>18)*mvyf>>18;
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,
-           (y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,
-           (y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           (x[2]-dxdi[2])/(double)0x40000,
-           (y[2]-dydi[2])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3+r&3]/(double)0x40000,
-           _mvy[3+r&3]/(double)0x40000,i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2+1);
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<3;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          dxdi[k]+=ddxdidj[k];
-          dydi[k]+=ddydidj[k];
-          x[k]=x0[k];
-          y[k]=y0[k];
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        for(k=0;k<4;k++){
-          w0[k]+=dwdj[k];
-          dwdi[k]+=ddwdidj[k];
-          w[k]=w0[k];
-        }
-      }
-    }break;
-    case OD_MC_INTERP_BBBV:r++;
-    case OD_MC_INTERP_BBVB:r++;
-    case OD_MC_INTERP_BVBB:r++;
-    case OD_MC_INTERP_VBBB:{
-      const unsigned char *mvp[2];
-      ogg_int32_t          mvxf[2];
-      ogg_int32_t          mvyf[2];
-      ogg_int32_t          x0[2];
-      ogg_int32_t          y0[2];
-      ogg_int32_t          x[2];
-      ogg_int32_t          y[2];
-      ogg_int32_t          dxdi[2];
-      ogg_int32_t          dydi[2];
-      ogg_int32_t          dxdj[2];
-      ogg_int32_t          dydj[2];
-      ogg_int32_t          ddxdidj;
-      ogg_int32_t          ddydidj;
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[3];
-      int                  dwdi[4];
-      int                  dwdj[4];
-      int                  ddwdidj[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      od_mc_setup_mvc_split(x0+0,dxdi+0,dxdj+0,&ddxdidj,_mvx,
-       MIDXS[5]/*0,1,0,0*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+0,dydi+0,dydj+0,&ddydidj,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(x0+1,dxdi+1,dxdj+1,&ddxdidj,_mvx,
-       MIDXS[7]/*0,1,1,1*/,r,_c,0,0,_log_blk_sz);
-      od_mc_setup_mvc_split(y0+1,dydi+1,dydj+1,&ddydidj,_mvy,
-       MIDXS[7]/*0,1,1,1*/,r,_c,0,0,_log_blk_sz);
-      for(k=0;k<2;k++){
-        mvp[k]=_src+(_mvx[2+k+r&3]>>18)+(_mvy[2+k+r&3]>>18)*_systride;
-        mvxf[k]=_mvx[2+k+r&3]&0x3FFFF;
-        mvyf[k]=_mvy[2+k+r&3]&0x3FFFF;
-      }
-      o0=0;
-      od_mc_setup_w_split(w0,dwdi,dwdj,ddwdidj,r,
-       _c,_s1&&(_c+1-r&3)!=1,_s3&&(_c+3-r&2)!=0,_log_blk_sz);
-      for(k=0;k<2;k++){
-        x[k]=x0[k];
-        y[k]=y0[k];
-      }
-      for(k=0;k<4;k++)w[k]=w0[k];
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          const unsigned char *p;
-          ogg_int32_t          c[4];
-          ogg_int32_t          xf;
-          ogg_int32_t          yf;
-          for(k=0;k<2;k++){
-            p=_src+o+(x[k]>>18)+(y[k]>>18)*_systride;
-            xf=x[k]&0x3FFFF;
-            yf=y[k]&0x3FFFF;
-            c[k]=(p[0]*(0x40000-xf)+p[1]*xf>>18)*(0x40000-yf)+
-             (p[_systride]*(0x40000-xf)+p[_systride+1]*xf>>18)*yf>>18;
-            x[k]+=dxdi[k];
-            y[k]+=dydi[k];
-            p=mvp[k]+o;
-            c[k+2]=(p[0]*(0x40000-mvxf[k])+p[1]*mvxf[k]>>18)*
-             (0x40000-mvyf[k])+(p[_systride]*(0x40000-mvxf[k])+
-             p[_systride+1]*mvxf[k]>>18)*mvyf[k]>>18;
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           (x[0]-dxdi[0])/(double)0x40000,(y[0]-dydi[0])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           (x[1]-dxdi[1])/(double)0x40000,(y[1]-dydi[1])/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           _mvx[2+r&3]/(double)0x40000,_mvy[2+r&3]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3+r&3]/(double)0x40000,_mvy[3+r&3]/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          dst[0]=c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2+1;
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        for(k=0;k<2;k++){
-          x0[k]+=dxdj[k];
-          y0[k]+=dydj[k];
-          dxdi[k]+=ddxdidj;
-          dydi[k]+=ddydidj;
-          x[k]=x0[k];
-          y[k]=y0[k];
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        for(k=0;k<4;k++){
-          w0[k]+=dwdj[k];
-          dwdi[k]+=ddwdidj[k];
-          w[k]=w0[k];
-        }
-      }
-    }break;
-    case OD_MC_INTERP_BBBB:{
-      const unsigned char *mvp[4];
-      ogg_int32_t          mvxf[4];
-      ogg_int32_t          mvyf[4];
-      ptrdiff_t            o0;
-      int                  w0[4];
-      int                  w[4];
-      int                  dwdi[4];
-      int                  dwdj[4];
-      int                  ddwdidj[4];
-      int                  i;
-      int                  j;
-      int                  k;
-      for(k=0;k<4;k++){
-        mvp[k]=_src+(_mvx[k]>>18)+(_mvy[k]>>18)*_systride;
-        mvxf[k]=_mvx[k]&0x3FFFF;
-        mvyf[k]=_mvy[k]&0x3FFFF;
-      }
-      o0=0;
-      od_mc_setup_w_split(w0,dwdi,dwdj,ddwdidj,r,_c,_s1,_s3,_log_blk_sz);
-      for(k=0;k<4;k++)w[k]=w0[k];
-      for(j=0;j<=blk_sz;j++){
-        unsigned char *dst;
-        ptrdiff_t      o;
-        o=o0;
-        dst=_dst;
-        for(i=0;i<=blk_sz;i++){
-          ogg_int32_t c[4];
-          for(k=0;k<4;k++){
-            const unsigned char *p;
-            p=mvp[k]+o;
-            c[k]=(p[0]*(0x40000-mvxf[k])+p[1]*mvxf[k]>>18)*(0x40000-mvyf[k])+
-             (p[_systride]*(0x40000-mvxf[k])+
-             p[_systride+1]*mvxf[k]>>18)*mvyf[k]>>18;
-          }
-          printf("%3X<%8.4lf,%8.4lf>",w[0],
-           _mvx[0]/(double)0x40000,_mvy[0]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[1],
-           _mvx[1]/(double)0x40000,_mvy[1]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>",w[2],
-           _mvx[2]/(double)0x40000,_mvy[2]/(double)0x40000);
-          printf("%3X<%8.4lf,%8.4lf>%s",w[3],
-           _mvx[3]/(double)0x40000,_mvy[3]/(double)0x40000,
-           i<blk_sz?"::":"\n");
-          dst[0]=(unsigned char)(
-           c[0]*w[0]+c[1]*w[1]+c[2]*w[2]+c[3]*w[3]>>log_blk_sz2+1);
-          o++;
-          dst++;
-          for(k=0;k<4;k++)w[k]+=dwdi[k];
-        }
-        o0+=_systride;
-        _dst+=_dystride;
-        for(k=0;k<4;k++){
-          w0[k]+=dwdj[k];
-          dwdi[k]+=ddwdidj[k];
-          w[k]=w0[k];
-        }
-      }
-    }
-  }
-}
-
-#if 1
-#include <stdio.h>
-
-static unsigned char mask[4][4]={
-  {0, 8, 2,10},
-  {12,4,14, 6},
-  {3, 11,1, 9},
-  {15,7,13, 5}
-};
-
-static unsigned char img[16*7][16*7];
-
-static ogg_int32_t mvs[4][4][2];
-static ogg_int32_t mvs2[4][4][2];
-
-static int edge_types[4][4]={
-  {0x0,0x1,0x2,0x8},
-  {0x4,0x6,0xA,0xC},
-  {0x5,0x7,0xE,0xD},
-  {0x3,0xF,0xB,0x9}
-};
-
-static int edge_types2[8][8]={
-  {0x0,0x4,0x1,0x5,0x0,0x2,0xA,0x8},
-  {0x2,0x9,0x0,0x1,0x2,0xA,0x8,0x0},
-  {0x6,0x8,0x2,0xA,0xC,0x6,0xC,0x4},
-  {0x5,0x4,0x6,0xE,0x9,0x3,0xF,0xD},
-  {0x1,0x5,0x3,0xF,0xA,0xE,0xB,0x9},
-  {0x6,0xD,0x4,0x7,0xE,0xF,0xE,0xC},
-  {0x5,0x7,0xB,0xF,0xD,0x7,0xB,0xD},
-  {0x3,0xB,0xC,0x7,0x9,0x3,0x8,0x1}
-};
-
-static void fill_mvs(int _log_blk_sz){
-  int i;
-  int j;
-  for(j=0;j<4;j++){
-    for(i=0;i<4;i++){
-      mvs[j][i][0]=(mask[j+0&3][i+0&3]-8)<<_log_blk_sz+15^
-       mask[j+2&3][i+2&3]<<_log_blk_sz+12;
-      mvs[j][i][1]=(mask[j+1&3][i+1&3]-8)<<_log_blk_sz+15^
-       mask[j+3&3][i+3&3]<<_log_blk_sz+12;
-      mvs2[j][i][0]=(mask[j+3&3][i+3&3]-8)<<_log_blk_sz+15^
-       mask[j+1&3][i+1&3]<<_log_blk_sz+12;
-      mvs2[j][i][1]=(mask[j+2&3][i+2&3]-8)<<_log_blk_sz+15^
-       mask[j+0&3][i+0&3]<<_log_blk_sz+12;
-    }
-  }
-}
-
-static void fill_img(int _log_blk_sz){
-  int i;
-  int j;
-  for(j=0;j<7;j++){
-    for(i=0;i<7;i++){
-      int c;
-      int x;
-      int y;
-      c=mask[j&3][i&3];
-      c=c<<4|15;
-      for(y=j<<_log_blk_sz;y<(j+1)<<_log_blk_sz;y++){
-        for(x=i<<_log_blk_sz;x<(i+1)<<_log_blk_sz;x++){
-          img[y][x]=c;
-        }
-      }
-    }
-  }
-  for(j=0;j<6<<_log_blk_sz;j++){
-    for(i=0;i<6<<_log_blk_sz;i++){
-      printf("%2X%c",img[j][i],i+1<6<<_log_blk_sz?' ':'\n');
-    }
-  }
-}
-
-int main(void){
-  int log_blk_sz;
-  for(log_blk_sz=2;log_blk_sz<=4;log_blk_sz++){
-    int blk_sz;
-    int i;
-    int j;
-    blk_sz=1<<log_blk_sz;
-    fill_img(log_blk_sz);
-    fill_mvs(log_blk_sz);
-    for(j=0;j<4;j++){
-      for(i=0;i<4;i++){
-        ogg_int32_t   mvx[4];
-        ogg_int32_t   mvy[4];
-        unsigned char dst[17][17];
-        unsigned char dst2[4][9][9];
-        unsigned      mismatch[4][9][9];
-        int           etype;
-        int           x;
-        int           y;
-        int           c;
-        mvx[0]=mvs[j][i][0];
-        mvy[0]=mvs[j][i][1];
-        mvx[1]=mvs[j][i+1&3][0];
-        mvy[1]=mvs[j][i+1&3][1];
-        mvx[2]=mvs[j+1&3][i+1&3][0];
-        mvy[2]=mvs[j+1&3][i+1&3][1];
-        mvx[3]=mvs[j+1&3][i][0];
-        mvy[3]=mvs[j+1&3][i][1];
-        etype=edge_types[j][i];
-        printf("Block (%i,%i): size %i, interpolation type: %c%c%c%c (0x%X)\n",
-         i,j,1<<log_blk_sz,
-         etype&1?'B':'V',etype&2?'B':'V',
-         etype&4?'B':'V',etype&8?'B':'V',etype);
-        printf("<%8.4lf,%8.4lf> <%8.4lf,%8.4lf>\n",
-         mvx[0]/(double)0x40000,mvy[0]/(double)0x40000,
-         mvx[1]/(double)0x40000,mvy[1]/(double)0x40000);
-        printf("<%8.4lf,%8.4lf> <%8.4lf,%8.4lf>\n",
-         mvx[3]/(double)0x40000,mvy[3]/(double)0x40000,
-         mvx[2]/(double)0x40000,mvy[2]/(double)0x40000);
-        od_mc_predict8(dst[0],sizeof(dst[0]),
-         img[j+1<<log_blk_sz]+(i+1<<log_blk_sz),sizeof(img[0]),mvx,mvy,
-         etype,log_blk_sz);
-        for(y=0;y<=blk_sz;y++){
-          for(x=0;x<=blk_sz;x++){
-            printf("%2X%c",dst[y][x],x<blk_sz?' ':'\n');
-          }
-        }
-        printf("\n");
-        for(c=0;c<4;c++){
-          int s1;
-          int s3;
-          mvx[0]=mvs[j][i][0];
-          mvy[0]=mvs[j][i][1];
-          mvx[1]=mvs[j][i+1&3][0];
-          mvy[1]=mvs[j][i+1&3][1];
-          mvx[2]=mvs[j+1&3][i+1&3][0];
-          mvy[2]=mvs[j+1&3][i+1&3][1];
-          mvx[3]=mvs[j+1&3][i][0];
-          mvy[3]=mvs[j+1&3][i][1];
-          mvx[c+2&3]=mvs2[j][i][0];
-          mvy[c+2&3]=mvs2[j][i][1];
-          etype=edge_types2[j<<1|(c>>1)][i<<1|((c+1&3)>>1)];
-          if(0||!(c&1)){
-            etype&=~(1<<(c+1&3));
-            if(c==3)etype|=(etype&8)>>3;
-            else etype|=(etype&1<<c)<<1;
-            s1=1;
-          }
-          else s1=0;
-          if(!s1||!(etype>>c&1)){
-            mvx[c+1&3]=mvx[c]+mvx[c+1&3]>>1;
-            mvy[c+1&3]=mvy[c]+mvy[c+1&3]>>1;
-          }
-          if(0||(c&1)){
-            etype&=~(1<<(c+2&3));
-            if(c==1)etype|=(etype&1)<<3;
-            else etype|=(etype&1<<(c+3&3))>>1;
-            s3=1;
-          }
-          else s3=0;
-          if(!s3||!(etype<<(-c&3)&8)){
-            mvx[c+3&3]=mvx[c]+mvx[c+3&3]>>1;
-            mvy[c+3&3]=mvy[c]+mvy[c+3&3]>>1;
-          }
-          printf("Block (%i.%i,%i.%i): size %i, "
-           "interpolation type: %c%c%c%c (0x%X)\n",
-           i,((c+1&3)>>1)*5,j,(c>>1)*5,1<<log_blk_sz-1,
-           etype&1?'B':'V',etype&2?'B':'V',
-           etype&4?'B':'V',etype&8?'B':'V',etype);
-          printf("<%9.5lf,%9.5lf> <%9.5lf,%9.5lf>\n",
-           mvx[0]/(double)0x40000,mvy[0]/(double)0x40000,
-           mvx[1]/(double)0x40000,mvy[1]/(double)0x40000);
-          printf("<%9.5lf,%9.5lf> <%9.5lf,%9.5lf>\n",
-           mvx[3]/(double)0x40000,mvy[3]/(double)0x40000,
-           mvx[2]/(double)0x40000,mvy[2]/(double)0x40000);
-          od_mc_predict8_split(dst2[c][0],sizeof(dst2[c][0]),
-           img[(j+1<<1|(c>>1))<<log_blk_sz-1]+
-           ((i+1<<1|((c+1&3)>>1))<<log_blk_sz-1),
-           sizeof(img[0]),mvx,mvy,etype,c,s1,s3,log_blk_sz-1);
-          memset(mismatch[c][0],0,sizeof(mismatch[c]));
-          switch(c){
-            case 0:{
-              for(x=0;x<=blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst[0][x])mismatch[c][0][x]++;
-              }
-              /*for(y=1;y<=blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst[y][0])mismatch[c][y][0]++;
-              }*/
-            }break;
-            case 1:{
-              for(x=0;x<=blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst[0][x+(blk_sz>>1)])mismatch[c][0][x]++;
-              }
-              /*for(y=1;y<=blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst[y][blk_sz]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }*/
-              for(y=0;y<=blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst2[0][y][blk_sz>>1])mismatch[c][y][0]++;
-              }
-            }break;
-            case 2:{
-              for(x=0;x<=blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst2[1][blk_sz>>1][x])mismatch[c][0][x]++;
-              }
-              /*for(y=0;y<=blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst[y+(blk_sz>>1)][blk_sz]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }*/
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][blk_sz>>1][x]!=dst[blk_sz][x+(blk_sz>>1)]){
-                  mismatch[c][blk_sz>>1][x]++;
-                }
-              }
-            }break;
-            case 3:{
-              for(x=0;x<=blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst2[0][blk_sz>>1][x])mismatch[c][0][x]++;
-              }
-              for(y=1;y<=blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst2[2][y][0]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }
-              for(x=0;x<=blk_sz>>1;x++){
-                if(dst2[c][blk_sz>>1][x]!=dst[blk_sz][x]){
-                  mismatch[c][blk_sz>>1][x]++;
-                }
-              }
-              /*for(y=0;y<blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst[y+(blk_sz>>1)][0])mismatch[c][y][0]++;
-              }*/
-            }break;
-          }
-          for(y=0;y<=blk_sz>>1;y++){
-            for(x=0;x<=blk_sz>>1;x++){
-              printf("%c%2X",mismatch[c][y][x]?'!':' ',dst2[c][y][x]);
-            }
-            printf("\n");
-          }
-          printf("\n");
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-#endif
diff --git a/src/mc-re.c b/src/mc-re.c
deleted file mode 100644 (file)
index 1134181..0000000
+++ /dev/null
@@ -1,1955 +0,0 @@
-/*Daala video codec
-Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
-
-#include <stdio.h>
-#include <stddef.h>
-#include "mc.h"
-
-
-
-/*A table of indices used to set up the rotated versions of each vector
-   interpolation formula.*/
-static const int MIDXS[][4]={
-  {0,1,2,3},/*0*/
-  {0,0,2,3},/*1*/
-  {1,1,2,3},/*2*/
-  {0,0,3,3},/*3*/
-  {1,1,2,2},/*4*/
-  {0,1,0,0},/*5*/
-  {2,1,2,2},/*6*/
-  {0,1,1,1},/*7*/
-};
-
-/*Set up the finite differences needed to interpolate a motion vector
-   component.
-  _x0:          The initial value.
-  _dxdi:        The initial amount to increment per unit change in i.
-  _dxdj:        The amount to increment per unit change in j.
-  _ddxdidj:     The amount to increment _dxdi by per unit change in j.
-  _mvx:         The component value of the 4 motion vectors.
-  _m:           The index of the motion vector to use for each corner in the
-                 base orientation.
-  _r:           The amount to rotate (clockwise) the formulas by (0...3).
-  _log_xblk_sz: The log base 2 of the horizontal block dimension.
-  _log_yblk_sz: The log base 2 of the vertical block dimension.*/
-static void od_mc_setup_mvc(ogg_int32_t *_x0,ogg_int32_t *_dxdi,
- ogg_int32_t *_dxdj,ogg_int32_t *_ddxdidj,const ogg_int32_t _mvx[4],
- const int _m[4],int _r,int _log_xblk_sz,int _log_yblk_sz){
-  int k;
-  *_x0=_mvx[_m[0-_r&3]+_r&3];
-  *_dxdi=_mvx[_m[1-_r&3]+_r&3]-*_x0>>_log_xblk_sz;
-  *_dxdj=_mvx[_m[3-_r&3]+_r&3]-*_x0>>_log_yblk_sz;
-  *_ddxdidj=_mvx[_m[0-_r&3]+_r&3]+_mvx[_m[2-_r&3]+_r&3]-
-   _mvx[_m[1-_r&3]+_r&3]-_mvx[_m[3-_r&3]+_r&3]>>_log_xblk_sz+_log_yblk_sz;
-  /*Advance the vector to the (0.5,0.5) position.*/
-  *_x0+=*_dxdj>>1;
-  *_dxdi+=*_ddxdidj>>1;
-  *_x0+=*_dxdi>>1;
-  *_dxdj+=*_ddxdidj>>1;
-}
-
-/*Form the prediction given by one interpolated motion vector.
-  _dst:         The destination buffer (xstride must be 1).
-  _dystride:    The byte offset between destination pixel rows.
-  _src:         The source buffer (xstride must be 1).
-  _systride:    The byte offset between source pixel rows.
-  _mvx:         The X component of the motion vectors.
-  _mvy:         The Y component of the motion vectors.
-  _m:           The index of the motion vector to use for each corner in the
-                 base orientation.
-  _r:           The amount to rotate (clockwise) the formulas by (0...3).
-  _log_xblk_sz: The log base 2 of the horizontal block dimension.
-  _log_yblk_sz: The log base 2 of the vertical block dimension.*/
-static void od_mc_predict1imv8(unsigned char *_dst,int _dystride,
- const unsigned char *_src,int _systride,const ogg_int32_t _mvx[4],
- const ogg_int32_t _mvy[4],const int _m[4],int _r,int _log_xblk_sz,
- int _log_yblk_sz){
-  ogg_int32_t x;
-  ogg_int32_t y;
-  ogg_int32_t x0;
-  ogg_int32_t y0;
-  ogg_int32_t dxdi;
-  ogg_int32_t dydi;
-  ogg_int32_t dxdj;
-  ogg_int32_t dydj;
-  ogg_int32_t ddxdidj;
-  ogg_int32_t ddydidj;
-  int         xblk_sz;
-  int         yblk_sz;
-  int         i;
-  int         j;
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  od_mc_setup_mvc(&x0,&dxdi,&dxdj,&ddxdidj,_mvx,_m,_r,
-   _log_xblk_sz,_log_yblk_sz);
-  od_mc_setup_mvc(&y0,&dydi,&dydj,&ddydidj,_mvy,_m,_r,
-   _log_xblk_sz,_log_yblk_sz);
-  for(j=0;j<yblk_sz;j++){
-    x=x0;
-    y=y0;
-    for(i=0;i<xblk_sz;i++){
-      const unsigned char *p;
-      ogg_int32_t          xf;
-      ogg_int32_t          yf;
-      ogg_uint32_t         a;
-      ogg_uint32_t         b;
-      /*printf("<%16.12f,%16.12f>%s",x/(double)0x40000,y/(double)0x40000,
-       i+1<xblk_sz?"::":"\n");*/
-      xf=x&0xFFFF;
-      yf=y&0xFFFF;
-      p=_src+(x>>16)+(i<<1)+((y>>16)+(j<<1))*_systride;
-      a=OD_DIV_POW2_RE(((ogg_uint32_t)p[0]<<16)+(p[1]-p[0])*xf,16);
-      b=OD_DIV_POW2_RE(((ogg_uint32_t)(p+_systride)[0]<<16)+
-       ((p+_systride)[1]-(p+_systride)[0])*xf,16);
-      _dst[i]=(unsigned char)OD_DIV_POW2_RE((a<<16)+(b-a)*yf,16);
-      x+=dxdi;
-      y+=dydi;
-    }
-    x0+=dxdj;
-    y0+=dydj;
-    dxdi+=ddxdidj;
-    dydi+=ddydidj;
-    _dst+=_dystride;
-  }
-  /*_dst-=_dystride*yblk_sz;
-  for(j=0;j<yblk_sz;j++){
-    for(i=0;i<xblk_sz;i++)printf("%2X ",*(_dst+i+j*_dystride));
-    printf("\n");
-  }*/
-}
-
-/*Form the prediction given by one fixed motion vector.
-  _dst:         The destination buffer (xstride must be 1).
-  _dystride:    The byte offset between destination pixel rows.
-  _src:         The source buffer (xstride must be 1).
-  _systride:    The byte offset between source pixel rows.
-  _mvx:         The X component of the motion vector.
-  _mvy:         The Y component of the motion vector.
-  _log_xblk_sz: The log base 2 of the horizontal block dimension.
-  _log_yblk_sz: The log base 2 of the vertical block dimension.*/
-static void od_mc_predict1fmv8(unsigned char *_dst,int _dystride,
- const unsigned char *_src,int _systride,ogg_int32_t _mvx,ogg_int32_t _mvy,
- int _log_xblk_sz,int _log_yblk_sz){
-  ogg_uint32_t mvxf;
-  ogg_uint32_t mvyf;
-  int         xblk_sz;
-  int         yblk_sz;
-  int         i;
-  int         j;
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  _src+=(_mvx>>16)+(_mvy>>16)*_systride;
-  mvxf=(ogg_uint32_t)(_mvx&0xFFFF);
-  mvyf=(ogg_uint32_t)(_mvy&0xFFFF);
-  if(mvxf!=0){
-    if(mvyf!=0){
-      for(j=0;j<yblk_sz;j++){
-        for(i=0;i<xblk_sz;i++){
-          ogg_uint32_t a;
-          ogg_uint32_t b;
-          /*printf("<%16.12f,%16.12f>%s",_mvx/(double)0x40000,
-           _mvy/(double)0x40000,i+1<xblk_sz?"::":"\n");*/
-          a=OD_DIV_POW2_RE(
-           ((ogg_uint32_t)_src[i<<1]<<16)+(_src[i<<1|1]-_src[i<<1])*mvxf,16);
-          b=OD_DIV_POW2_RE(((ogg_uint32_t)(_src+_systride)[i<<1]<<16)+
-           ((_src+_systride)[i<<1|1]-(_src+_systride)[i<<1])*mvxf,16);
-          _dst[i]=(unsigned char)OD_DIV_POW2_RE((a<<16)+(b-a)*mvyf,16);
-        }
-        _src+=_systride<<1;
-        _dst+=_dystride;
-      }
-    }
-    else{
-      for(j=0;j<yblk_sz;j++){
-        for(i=0;i<xblk_sz;i++){
-          /*printf("<%16.12f,%16.12f>%s",_mvx/(double)0x40000,
-           _mvy/(double)0x40000,i+1<xblk_sz?"::":"\n");*/
-          _dst[i]=(unsigned char)OD_DIV_POW2_RE(
-           ((ogg_uint32_t)_src[i<<1]<<16)+(_src[i<<1|1]-_src[i<<1])*mvxf,16);
-        }
-        _src+=_systride<<1;
-        _dst+=_dystride;
-      }
-    }
-  }
-  else{
-    if(mvyf!=0){
-      for(j=0;j<yblk_sz;j++){
-        for(i=0;i<xblk_sz;i++){
-          /*printf("<%16.12f,%16.12f>%s",_mvx/(double)0x40000,
-           _mvy/(double)0x40000,i+1<xblk_sz?"::":"\n");*/
-          _dst[i]=(unsigned char)OD_DIV_POW2_RE(((ogg_uint32_t)_src[i<<1]<<16)+
-           ((_src+_systride)[(i<<1)]-_src[i<<1])*mvyf,16);
-        }
-        _src+=_systride<<1;
-        _dst+=_dystride;
-      }
-    }
-    else{
-      for(j=0;j<yblk_sz;j++){
-        for(i=0;i<xblk_sz;i++){
-          /*printf("<%16.12f,%16.12f>%s",_mvx/(double)0x40000,
-           _mvy/(double)0x40000,i+1<xblk_sz?"::":"\n");*/
-          _dst[i]=_src[i<<1];
-        }
-        _src+=_systride<<1;
-        _dst+=_dystride;
-      }
-    }
-  }
-  /*_dst-=_dystride*yblk_sz;
-  for(j=0;j<yblk_sz;j++){
-    for(i=0;i<xblk_sz;i++)printf("%2X ",*(_dst+i+j*_dystride));
-    printf("\n");
-  }*/
-}
-
-/*Perform normal bilinear blending.*/
-static void od_mc_blend_full8(unsigned char *_dst,int _dystride,
- const unsigned char *_src[4],int _systride,int _log_xblk_sz,int _log_yblk_sz){
-  unsigned char *dst0;
-  unsigned char *dst;
-  ptrdiff_t      o;
-  ptrdiff_t      o0;
-  unsigned       a;
-  unsigned       b;
-  int            log_blk_sz2;
-  int            xblk_sz;
-  int            yblk_sz;
-  int            xblk_sz2;
-  int            yblk_sz2;
-  int            i;
-  int            j;
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  xblk_sz2=xblk_sz<<1;
-  yblk_sz2=yblk_sz<<1;
-  log_blk_sz2=_log_xblk_sz+_log_yblk_sz;
-  o0=0;
-  dst0=_dst;
-  for(j=1;j<yblk_sz2;j+=2){
-    o=o0;
-    dst=dst0;
-    for(i=1;i<xblk_sz2;i+=2){
-      a=(*(_src[0]+o)<<_log_xblk_sz+1)+(*(_src[1]+o)-*(_src[0]+o))*i;
-      b=(*(_src[3]+o)<<_log_xblk_sz+1)+(*(_src[2]+o)-*(_src[3]+o))*i;
-      dst[0]=(unsigned char)OD_DIV_POW2_RE(
-       (a<<_log_yblk_sz+1)+(b-a)*j,log_blk_sz2+2);
-      o++;
-      dst++;
-    }
-    o0+=_systride;
-    dst0+=_dystride;
-  }
-}
-
-/*Perform multiresolution bilinear blending.*/
-static void od_mc_blend_multi8(unsigned char *_dst,int _dystride,
- const unsigned char *_src[4],int _systride,int _log_xblk_sz,int _log_yblk_sz){
-  const unsigned char *p;
-  unsigned char       *dst0;
-  unsigned char       *dst;
-  ptrdiff_t            o;
-  ptrdiff_t            o0;
-  int                  ll[4];
-  int                  lh;
-  int                  hl;
-  int                  hh;
-  int                  a;
-  int                  b;
-  int                  c;
-  int                  d;
-  int                  log_blk_sz2;
-  int                  xblk_sz;
-  int                  yblk_sz;
-  int                  i;
-  int                  j;
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  log_blk_sz2=_log_xblk_sz+_log_yblk_sz;
-  o0=0;
-  dst0=_dst;
-  /*Perform multiresolution blending.*/
-  int xblk_sz_2;
-  int yblk_sz_2;
-  xblk_sz_2=xblk_sz>>1;
-  yblk_sz_2=yblk_sz>>1;
-  for(j=1;j<yblk_sz_2;j+=2){
-    o=o0;
-    dst=dst0;
-    /*Upper-left quadrant.*/
-    for(i=1;i<xblk_sz_2;i+=2){
-      p=_src[0]+o;
-      /*Forward Haar wavelet.*/
-      ll[0]=p[0]+p[1];
-      lh=p[0]-p[1];
-      hl=(p+_systride)[0]+(p+_systride)[1];
-      hh=(p+_systride)[0]-(p+_systride)[1];
-      c=ll[0]-hl;
-      ll[0]+=hl;
-      hl=c;
-      /*No need to finish the transform; we'd just invert it later.*/
-      p=_src[1]+o;
-      ll[1]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[2]+o;
-      ll[2]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[3]+o;
-      ll[3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      /*LL blending.*/
-      a=(ll[0]<<_log_xblk_sz)+(ll[1]-ll[0])*i;
-      b=(ll[3]<<_log_xblk_sz)+(ll[2]-ll[3])*i;
-      a=(int)OD_DIV_POW2_RE(((ogg_int32_t)a<<_log_yblk_sz)+
-       (ogg_int32_t)(b-a)*j,log_blk_sz2);
-      /*Inverse Haar wavelet.*/
-      c=OD_DIV2_RE(a-hl);
-      a=OD_DIV2_RE(a+hl);
-      d=OD_DIV2_RE(c-hh);
-      c=OD_DIV2_RE(c+hh);
-      b=OD_DIV2_RE(a-lh);
-      a=OD_DIV2_RE(a+lh);
-      dst[0]=od_clamp255(a);
-      dst[1]=od_clamp255(b);
-      (dst+_dystride)[0]=od_clamp255(c);
-      (dst+_dystride)[1]=od_clamp255(d);
-      o+=2;
-      dst+=2;
-    }
-    /*Upper-right quadrant.*/
-    for(;i<xblk_sz;i+=2){
-      p=_src[0]+o;
-      ll[0]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[1]+o;
-      /*Forward Haar wavelet.*/
-      ll[1]=p[0]+p[1];
-      lh=p[0]-p[1];
-      hl=(p+_systride)[0]+(p+_systride)[1];
-      hh=(p+_systride)[0]-(p+_systride)[1];
-      c=ll[1]-hl;
-      ll[1]+=hl;
-      hl=c;
-      /*No need to finish the transform; we'd just invert it later.*/
-      p=_src[2]+o;
-      ll[2]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[3]+o;
-      ll[3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      /*LL blending.*/
-      a=(ll[0]<<_log_xblk_sz)+(ll[1]-ll[0])*i;
-      b=(ll[3]<<_log_xblk_sz)+(ll[2]-ll[3])*i;
-      a=(int)OD_DIV_POW2_RE(((ogg_int32_t)a<<_log_yblk_sz)+
-       (ogg_int32_t)(b-a)*j,log_blk_sz2);
-      /*Inverse Haar wavelet.*/
-      c=OD_DIV2_RE(a-hl);
-      a=OD_DIV2_RE(a+hl);
-      d=OD_DIV2_RE(c-hh);
-      c=OD_DIV2_RE(c+hh);
-      b=OD_DIV2_RE(a-lh);
-      a=OD_DIV2_RE(a+lh);
-      dst[0]=od_clamp255(a);
-      dst[1]=od_clamp255(b);
-      (dst+_dystride)[0]=od_clamp255(c);
-      (dst+_dystride)[1]=od_clamp255(d);
-      o+=2;
-      dst+=2;
-    }
-    o0+=_systride<<1;
-    dst0+=_dystride<<1;
-  }
-  for(;j<yblk_sz;j+=2){
-    o=o0;
-    dst=dst0;
-    /*Lower-left quadrant.*/
-    for(i=1;i<xblk_sz_2;i+=2){
-      p=_src[0]+o;
-      ll[0]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[1]+o;
-      ll[1]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[2]+o;
-      ll[2]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[3]+o;
-      /*Forward Haar wavelet.*/
-      ll[3]=p[0]+p[1];
-      lh=p[0]-p[1];
-      hl=(p+_systride)[0]+(p+_systride)[1];
-      hh=(p+_systride)[0]-(p+_systride)[1];
-      c=ll[3]-hl;
-      ll[3]+=hl;
-      hl=c;
-      /*No need to finish the transform; we'd just invert it later.*/
-      /*LL blending.*/
-      a=(ll[0]<<_log_xblk_sz)+(ll[1]-ll[0])*i;
-      b=(ll[3]<<_log_xblk_sz)+(ll[2]-ll[3])*i;
-      a=(int)OD_DIV_POW2_RE(((ogg_int32_t)a<<_log_yblk_sz)+
-       (ogg_int32_t)(b-a)*j,log_blk_sz2);
-      /*Inverse Haar wavelet.*/
-      c=OD_DIV2_RE(a-hl);
-      a=OD_DIV2_RE(a+hl);
-      d=OD_DIV2_RE(c-hh);
-      c=OD_DIV2_RE(c+hh);
-      b=OD_DIV2_RE(a-lh);
-      a=OD_DIV2_RE(a+lh);
-      dst[0]=od_clamp255(a);
-      dst[1]=od_clamp255(b);
-      (dst+_dystride)[0]=od_clamp255(c);
-      (dst+_dystride)[1]=od_clamp255(d);
-      o+=2;
-      dst+=2;
-    }
-    /*Lower-right quadrant.*/
-    for(;i<xblk_sz;i+=2){
-      p=_src[0]+o;
-      ll[0]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[1]+o;
-      ll[1]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[2]+o;
-      /*Forward Haar wavelet.*/
-      ll[2]=p[0]+p[1];
-      lh=p[0]-p[1];
-      hl=(p+_systride)[0]+(p+_systride)[1];
-      hh=(p+_systride)[0]-(p+_systride)[1];
-      c=ll[2]-hl;
-      ll[2]+=hl;
-      hl=c;
-      /*No need to finish the transform; we'd just invert it later.*/
-      p=_src[3]+o;
-      ll[3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      /*LL blending.*/
-      a=(ll[0]<<_log_xblk_sz)+(ll[1]-ll[0])*i;
-      b=(ll[3]<<_log_xblk_sz)+(ll[2]-ll[3])*i;
-      a=(int)OD_DIV_POW2_RE(((ogg_int32_t)a<<_log_yblk_sz)+
-       (ogg_int32_t)(b-a)*j,log_blk_sz2);
-      /*Inverse Haar wavelet.*/
-      c=OD_DIV2_RE(a-hl);
-      a=OD_DIV2_RE(a+hl);
-      d=OD_DIV2_RE(c-hh);
-      c=OD_DIV2_RE(c+hh);
-      b=OD_DIV2_RE(a-lh);
-      a=OD_DIV2_RE(a+lh);
-      dst[0]=od_clamp255(a);
-      dst[1]=od_clamp255(b);
-      (dst+_dystride)[0]=od_clamp255(c);
-      (dst+_dystride)[1]=od_clamp255(d);
-      o+=2;
-      dst+=2;
-    }
-    o0+=_systride<<1;
-    dst0+=_dystride<<1;
-  }
-}
-
-static void od_mc_setup_s_split(int _s0[4],int _dsdi[4],int _dsdj[4],
- int _ddsdidj[4],int _c,int _s1,int _s3,int _log_xblk_sz,int _log_yblk_sz){
-  int log_blk_sz2;
-  int k;
-  log_blk_sz2=_log_xblk_sz+_log_yblk_sz;
-  _s0[0]=8<<log_blk_sz2;
-  _s0[1]=_s0[2]=_s0[3]=0;
-  _dsdi[0]=-8<<_log_xblk_sz;
-  _dsdi[1]=8<<_log_xblk_sz;
-  _dsdi[2]=_dsdi[3]=0;
-  _dsdj[0]=-8<<_log_yblk_sz;
-  _dsdj[1]=_dsdj[2]=0;
-  _dsdj[3]=8<<_log_yblk_sz;
-  _ddsdidj[0]=_ddsdidj[2]=8;
-  _ddsdidj[1]=_ddsdidj[3]=-8;
-  if(!_s1){
-    k=_c+1&3;
-    _s0[k]>>=1;
-    _s0[_c]+=_s0[k];
-    _dsdi[k]>>=1;
-    _dsdi[_c]+=_dsdi[k];
-    _dsdj[k]>>=1;
-    _dsdj[_c]+=_dsdj[k];
-    _ddsdidj[k]>>=1;
-    _ddsdidj[_c]+=_ddsdidj[k];
-  }
-  if(!_s3){
-    k=_c+3&3;
-    _s0[k]>>=1;
-    _s0[_c]+=_s0[k];
-    _dsdi[k]>>=1;
-    _dsdi[_c]+=_dsdi[k];
-    _dsdj[k]>>=1;
-    _dsdj[_c]+=_dsdj[k];
-    _ddsdidj[k]>>=1;
-    _ddsdidj[_c]+=_ddsdidj[k];
-  }
-  /*Advance the weights to the (0.5,0.5) position.*/
-  for(k=0;k<4;k++){
-    _s0[k]+=_dsdj[k]>>1;
-    _dsdi[k]+=_ddsdidj[k]>>1;
-    _s0[k]+=_dsdi[k]>>1;
-    _dsdj[k]+=_ddsdidj[k]>>1;
-  }
-}
-
-/*Perform normal blending with bilinear weights modified for unsplit edges.*/
-static void od_mc_blend_full_split8(unsigned char *_dst,int _dystride,
- const unsigned char *_src[4],int _systride,int _c,int _s1,int _s3,
- int _log_xblk_sz,int _log_yblk_sz){
-  unsigned char *dst0;
-  unsigned char *dst;
-  ptrdiff_t      o;
-  ptrdiff_t      o0;
-  int            s[4];
-  int            s0[4];
-  int            dsdi[4];
-  int            dsdj[4];
-  int            ddsdidj[4];
-  int            xblk_sz;
-  int            yblk_sz;
-  int            log_blk_sz2p3;;
-  int            i;
-  int            j;
-  int            k;
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  o0=0;
-  dst0=_dst;
-  /*The block is too small; perform normal blending.*/
-  log_blk_sz2p3=_log_xblk_sz+_log_yblk_sz+3;
-  od_mc_setup_s_split(s0,dsdi,dsdj,ddsdidj,_c,_s1,_s3,
-   _log_xblk_sz,_log_yblk_sz);
-  for(k=0;k<4;k++)s[k]=s0[k];
-  for(j=0;j<yblk_sz;j++){
-    o=o0;
-    dst=dst0;
-    for(i=0;i<xblk_sz;i++){
-      dst[0]=(unsigned char)OD_DIV_POW2_RE(
-       *(_src[0]+o)*s[0]+*(_src[1]+o)*s[1]+
-       *(_src[2]+o)*s[2]+*(_src[3]+o)*s[3],log_blk_sz2p3);
-      o++;
-      dst++;
-      for(k=0;k<4;k++)s[k]+=dsdi[k];
-    }
-    o0+=_systride;
-    dst0+=_dystride;
-    for(k=0;k<4;k++){
-      s0[k]+=dsdj[k];
-      s[k]=s0[k];
-      dsdi[k]+=ddsdidj[k];
-    }
-  }
-}
-
-/*There are other ways to implement multiresolution blending for the modified
-   bilinear weights, but using a table lookup to select which predictor to
-   draw the high-frequency coefficients from moves all the complexity into the
-   tables, and leaves the code dead simple.*/
-
-/*The MV from which to use the high-frequency coefficients for a 2x2 LL band.*/
-static const unsigned char OD_MC_SIDXS_22[3][4][4]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,
-      0,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,
-      3,1
-    },{
-      /*Corner: 2; split: none*/
-      0,2,
-      2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,1,
-      3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,1,
-      0,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,
-      3,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,2,
-      3,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,
-      3,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,
-      3,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,1,
-      3,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,1,
-      2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,1,
-      3,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for a 2x4 LL band.*/
-static const unsigned char OD_MC_SIDXS_24[3][4][8]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,
-      0,0,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,
-      3,3,1,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,2,2,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,3,1,1,
-      3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,1,1,
-      0,0,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,
-      3,3,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,
-      3,3,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,
-      3,3,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,1,1,
-      3,3,1,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,1,1,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,3,1,1,
-      3,3,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for a 2x8 LL band.*/
-static const unsigned char OD_MC_SIDXS_28[3][4][16]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,2,2,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,1,1,1,1,
-      3,3,3,3,1,1,1,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,0,0,2,2,2,2,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,2,2,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,1,1,1,1,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,0,0,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,1,1,1,1,
-      3,3,3,3,2,2,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,0,0,0,0,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,0,0,1,1,1,1,
-      3,3,3,3,1,1,1,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,0,0,1,1,1,1,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,2,2,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for a 4x2 LL band.*/
-static const unsigned char OD_MC_SIDXS_42[3][4][8]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,
-      0,0,
-      0,2,
-      0,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,
-      1,1,
-      3,1,
-      3,1
-    },{
-      /*Corner: 2; split: none*/
-      0,2,
-      0,2,
-      2,2,
-      2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,1,
-      3,1,
-      3,3,
-      3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,1,
-      0,1,
-      0,2,
-      0,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,
-      1,1,
-      3,2,
-      3,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,2,
-      0,2,
-      3,2,
-      3,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,
-      1,1,
-      3,2,
-      3,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,
-      0,0,
-      3,2,
-      3,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,1,
-      0,1,
-      3,1,
-      3,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,1,
-      0,1,
-      2,2,
-      2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,1,
-      3,1,
-      3,2,
-      3,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for a 4x4 LL band.*/
-static const unsigned char OD_MC_SIDXS_44[3][4][16]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,
-      0,0,0,0,
-      0,0,2,2,
-      0,0,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,
-      1,1,1,1,
-      3,3,1,1,
-      3,3,1,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,2,2,
-      0,0,2,2,
-      2,2,2,2,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,3,1,1,
-      3,3,1,1,
-      3,3,3,3,
-      3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,1,1,
-      0,0,1,1,
-      0,0,2,2,
-      0,0,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,
-      1,1,1,1,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,2,2,
-      0,0,2,2,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,
-      1,1,1,1,
-      3,3,2,2,
-      3,3,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,
-      0,0,0,0,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,1,1,
-      0,0,1,1,
-      3,3,1,1,
-      3,3,1,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,1,1,
-      0,0,1,1,
-      2,2,2,2,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,3,1,1,
-      3,3,1,1,
-      3,3,2,2,
-      3,3,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for a 4x8 LL band.*/
-static const unsigned char OD_MC_SIDXS_48[3][4][32]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,2,
-      0,0,0,0,0,2,2,2,
-      0,0,0,2,2,2,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,1,1,1,1,
-      3,1,1,1,1,1,1,1,
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,3,1,1,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,0,0,0,2,2,2,
-      0,0,0,2,2,2,2,2,
-      0,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,3,1,1,1,
-      3,3,3,3,3,3,3,1,
-      3,3,3,3,3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,0,1,1,1,
-      0,0,0,0,2,2,2,2,
-      0,0,0,2,2,2,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,1,1,1,1,
-      3,1,1,1,1,1,1,1,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,0,0,0,2,2,2,
-      0,0,0,0,2,2,2,2,
-      3,3,3,2,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,1,1,1,1,
-      3,1,1,1,1,1,1,1,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,1,1,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,2,2,2,
-      3,3,3,3,2,2,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for an 8x2 LL band.*/
-static const unsigned char OD_MC_SIDXS_82[3][4][16]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,
-      0,0,
-      0,0,
-      0,0,
-      0,2,
-      0,2,
-      0,2,
-      0,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,
-      1,1,
-      1,1,
-      1,1,
-      3,1,
-      3,1,
-      3,1,
-      3,1
-    },{
-      /*Corner: 2; split: none*/
-      0,2,
-      0,2,
-      0,2,
-      0,2,
-      2,2,
-      2,2,
-      2,2,
-      2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,1,
-      3,1,
-      3,1,
-      3,1,
-      3,3,
-      3,3,
-      3,3,
-      3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,1,
-      0,1,
-      0,1,
-      0,1,
-      0,2,
-      0,2,
-      0,2,
-      0,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,
-      1,1,
-      1,1,
-      1,1,
-      3,2,
-      3,2,
-      3,2,
-      3,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,2,
-      0,2,
-      0,2,
-      0,2,
-      3,2,
-      3,2,
-      3,2,
-      3,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,
-      1,1,
-      1,1,
-      1,1,
-      3,2,
-      3,2,
-      3,2,
-      3,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,
-      0,0,
-      0,0,
-      0,0,
-      3,2,
-      3,2,
-      3,2,
-      3,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,1,
-      0,1,
-      0,1,
-      0,1,
-      3,1,
-      3,1,
-      3,1,
-      3,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,1,
-      0,1,
-      0,1,
-      0,1,
-      0,1,
-      2,2,
-      2,2,
-      2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,1,
-      3,1,
-      3,1,
-      3,1,
-      3,2,
-      3,2,
-      3,2,
-      3,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for an 8x4 LL band.*/
-static const unsigned char OD_MC_SIDXS_84[3][4][32]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,
-      0,0,0,0,
-      0,0,0,0,
-      0,0,0,2,
-      0,0,0,2,
-      0,0,2,2,
-      0,0,2,2,
-      0,2,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,
-      1,1,1,1,
-      1,1,1,1,
-      3,1,1,1,
-      3,1,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,3,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,0,2,
-      0,0,2,2,
-      0,0,2,2,
-      0,2,2,2,
-      0,2,2,2,
-      2,2,2,2,
-      2,2,2,2,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,1,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,3,1,
-      3,3,3,1,
-      3,3,3,3,
-      3,3,3,3,
-      3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,1,1,
-      0,0,1,1,
-      0,0,1,1,
-      0,0,1,1,
-      0,0,2,2,
-      0,0,2,2,
-      0,0,2,2,
-      0,2,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,
-      1,1,1,1,
-      1,1,1,1,
-      3,1,1,1,
-      3,3,1,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,0,2,
-      0,0,2,2,
-      0,0,2,2,
-      0,0,2,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,
-      1,1,1,1,
-      1,1,1,1,
-      3,1,1,1,
-      3,3,1,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,
-      0,0,0,0,
-      0,0,0,0,
-      0,0,0,2,
-      3,0,2,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,1,1,
-      0,0,1,1,
-      0,0,1,1,
-      0,0,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,3,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,1,1,
-      0,0,1,1,
-      0,0,1,1,
-      0,0,2,1,
-      0,2,2,2,
-      2,2,2,2,
-      2,2,2,2,
-      2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,1,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,1,1,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2,
-      3,3,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients for an 8x8 LL band.*/
-static const unsigned char OD_MC_SIDXS_88[3][4][64]={
-  {
-    {
-      /*Corner: 0; split: none*/
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,2,
-      0,0,0,0,0,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,2,2,2,2,2
-    },{
-      /*Corner: 1; split: none*/
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      3,1,1,1,1,1,1,1,
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,1,1,1
-    },{
-      /*Corner: 2; split: none*/
-      0,0,0,0,0,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,2,2,2,2,2,
-      0,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: none*/
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,1,1,1,
-      3,3,3,3,3,3,3,1,
-      3,3,3,3,3,3,3,3,
-      3,3,3,3,3,3,3,3,
-      3,3,3,3,3,3,3,3
-    }
-  },{
-    {
-      /*Corner: 0; split: 1*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,0,1,1,1,
-      0,0,0,0,2,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,2,2,2,2,2,
-      0,0,0,2,2,2,2,2
-    },{
-      /*Corner: 1; split: 2*/
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      3,3,1,1,1,1,1,1,
-      3,3,3,3,1,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 2; split: 3*/
-      0,0,0,0,0,2,2,2,
-      0,0,0,0,0,2,2,2,
-      0,0,0,0,2,2,2,2,
-      0,0,0,0,2,2,2,2,
-      3,3,3,2,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 3; split: 0*/
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      1,1,1,1,1,1,1,1,
-      3,3,1,1,1,1,1,1,
-      3,3,3,3,1,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    }
-  },{
-    {
-      /*Corner: 0; split: 3*/
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,0,0,
-      0,0,0,0,0,0,2,2,
-      3,3,3,0,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    },{
-      /*Corner: 1; split: 0*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,1,1,1,
-      3,3,3,3,3,1,1,1
-    },{
-      /*Corner: 2; split: 1*/
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,1,1,1,1,
-      0,0,0,0,2,1,1,1,
-      0,0,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2,
-      2,2,2,2,2,2,2,2
-    },{
-      /*Corner: 3; split: 2*/
-      3,3,3,1,1,1,1,1,
-      3,3,3,1,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,1,1,1,1,
-      3,3,3,3,3,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2,
-      3,3,3,3,2,2,2,2
-    }
-  }
-};
-
-/*The MV from which to use the high-frequency coefficients, indexed by:
-   [log_yblk_sz-2][log_xblk_sz-2][!!s3<<1|!!s1][c][j<<log_xblk_sz-1|i].*/
-static const unsigned char *OD_MC_SIDXS[3][3][3][4]={
-  {
-    {
-      {
-        OD_MC_SIDXS_22[0][0],OD_MC_SIDXS_22[0][1],
-        OD_MC_SIDXS_22[0][2],OD_MC_SIDXS_22[0][3]
-      },{
-        OD_MC_SIDXS_22[1][0],OD_MC_SIDXS_22[1][1],
-        OD_MC_SIDXS_22[1][2],OD_MC_SIDXS_22[1][3]
-      },{
-        OD_MC_SIDXS_22[2][0],OD_MC_SIDXS_22[2][1],
-        OD_MC_SIDXS_22[2][2],OD_MC_SIDXS_22[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_24[0][0],OD_MC_SIDXS_24[0][1],
-        OD_MC_SIDXS_24[0][2],OD_MC_SIDXS_24[0][3]
-      },{
-        OD_MC_SIDXS_24[1][0],OD_MC_SIDXS_24[1][1],
-        OD_MC_SIDXS_24[1][2],OD_MC_SIDXS_24[1][3]
-      },{
-        OD_MC_SIDXS_24[2][0],OD_MC_SIDXS_24[2][1],
-        OD_MC_SIDXS_24[2][2],OD_MC_SIDXS_24[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_28[0][0],OD_MC_SIDXS_28[0][1],
-        OD_MC_SIDXS_28[0][2],OD_MC_SIDXS_28[0][3]
-      },{
-        OD_MC_SIDXS_28[1][0],OD_MC_SIDXS_28[1][1],
-        OD_MC_SIDXS_28[1][2],OD_MC_SIDXS_28[1][3]
-      },{
-        OD_MC_SIDXS_28[2][0],OD_MC_SIDXS_28[2][1],
-        OD_MC_SIDXS_28[2][2],OD_MC_SIDXS_28[2][3]
-      }
-    }
-  },{
-    {
-      {
-        OD_MC_SIDXS_42[0][0],OD_MC_SIDXS_42[0][1],
-        OD_MC_SIDXS_42[0][2],OD_MC_SIDXS_42[0][3]
-      },{
-        OD_MC_SIDXS_42[1][0],OD_MC_SIDXS_42[1][1],
-        OD_MC_SIDXS_42[1][2],OD_MC_SIDXS_42[1][3]
-      },{
-        OD_MC_SIDXS_42[2][0],OD_MC_SIDXS_42[2][1],
-        OD_MC_SIDXS_42[2][2],OD_MC_SIDXS_42[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_44[0][0],OD_MC_SIDXS_44[0][1],
-        OD_MC_SIDXS_44[0][2],OD_MC_SIDXS_44[0][3]
-      },{
-        OD_MC_SIDXS_44[1][0],OD_MC_SIDXS_44[1][1],
-        OD_MC_SIDXS_44[1][2],OD_MC_SIDXS_44[1][3]
-      },{
-        OD_MC_SIDXS_44[2][0],OD_MC_SIDXS_44[2][1],
-        OD_MC_SIDXS_44[2][2],OD_MC_SIDXS_44[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_48[0][0],OD_MC_SIDXS_48[0][1],
-        OD_MC_SIDXS_48[0][2],OD_MC_SIDXS_48[0][3]
-      },{
-        OD_MC_SIDXS_48[1][0],OD_MC_SIDXS_48[1][1],
-        OD_MC_SIDXS_48[1][2],OD_MC_SIDXS_48[1][3]
-      },{
-        OD_MC_SIDXS_48[2][0],OD_MC_SIDXS_48[2][1],
-        OD_MC_SIDXS_48[2][2],OD_MC_SIDXS_48[2][3]
-      }
-    }
-  },{
-    {
-      {
-        OD_MC_SIDXS_82[0][0],OD_MC_SIDXS_82[0][1],
-        OD_MC_SIDXS_82[0][2],OD_MC_SIDXS_82[0][3]
-      },{
-        OD_MC_SIDXS_82[1][0],OD_MC_SIDXS_82[1][1],
-        OD_MC_SIDXS_82[1][2],OD_MC_SIDXS_82[1][3]
-      },{
-        OD_MC_SIDXS_82[2][0],OD_MC_SIDXS_82[2][1],
-        OD_MC_SIDXS_82[2][2],OD_MC_SIDXS_82[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_84[0][0],OD_MC_SIDXS_84[0][1],
-        OD_MC_SIDXS_84[0][2],OD_MC_SIDXS_84[0][3]
-      },{
-        OD_MC_SIDXS_84[1][0],OD_MC_SIDXS_84[1][1],
-        OD_MC_SIDXS_84[1][2],OD_MC_SIDXS_84[1][3]
-      },{
-        OD_MC_SIDXS_84[2][0],OD_MC_SIDXS_84[2][1],
-        OD_MC_SIDXS_84[2][2],OD_MC_SIDXS_84[2][3]
-      }
-    },{
-      {
-        OD_MC_SIDXS_88[0][0],OD_MC_SIDXS_88[0][1],
-        OD_MC_SIDXS_88[0][2],OD_MC_SIDXS_88[0][3]
-      },{
-        OD_MC_SIDXS_88[1][0],OD_MC_SIDXS_88[1][1],
-        OD_MC_SIDXS_88[1][2],OD_MC_SIDXS_88[1][3]
-      },{
-        OD_MC_SIDXS_88[2][0],OD_MC_SIDXS_88[2][1],
-        OD_MC_SIDXS_88[2][2],OD_MC_SIDXS_88[2][3]
-      }
-    }
-  }
-};
-
-/*Perform multiresolution blending with bilinear weights modified for unsplit
-   edges.*/
-static void od_mc_blend_multi_split8(unsigned char *_dst,int _dystride,
- const unsigned char *_src[4],int _systride,int _c,int _s1,int _s3,
- int _log_xblk_sz,int _log_yblk_sz){
-  const unsigned char *p;
-  const unsigned char *sidx0;
-  const unsigned char *sidx;
-  unsigned char       *dst0;
-  unsigned char       *dst;
-  ptrdiff_t            o;
-  ptrdiff_t            o0;
-  int                  ll[4];
-  int                  lh;
-  int                  hl;
-  int                  hh;
-  int                  a;
-  int                  b;
-  int                  c;
-  int                  d;
-  int                  s[4];
-  int                  s0[4];
-  int                  dsdi[4];
-  int                  dsdj[4];
-  int                  ddsdidj[4];
-  int                  xblk_sz;
-  int                  yblk_sz;
-  int                  log_blk_sz2p1;
-  int                  i;
-  int                  j;
-  int                  k;
-  /*Perform multiresolution blending.*/
-  xblk_sz=1<<_log_xblk_sz;
-  yblk_sz=1<<_log_yblk_sz;
-  o0=0;
-  dst0=_dst;
-  log_blk_sz2p1=_log_xblk_sz+_log_yblk_sz+1;
-  od_mc_setup_s_split(s0,dsdi,dsdj,ddsdidj,_c,_s1,_s3,
-   _log_xblk_sz-1,_log_yblk_sz-1);
-  sidx0=OD_MC_SIDXS[_log_yblk_sz-2][_log_xblk_sz-2][!!_s3<<1|!!_s1][_c];
-  for(k=0;k<4;k++)s[k]=s0[k];
-  for(j=1;j<yblk_sz;j+=2){
-    o=o0;
-    dst=dst0;
-    sidx=sidx0;
-    /*Upper-left quadrant.*/
-    for(i=1;i<xblk_sz;i+=2){
-      k=*sidx++;
-      p=_src[k]+o;
-      /*Forward Haar wavelet.*/
-      ll[k]=p[0]+p[1];
-      lh=p[0]-p[1];
-      hl=(p+_systride)[0]+(p+_systride)[1];
-      hh=(p+_systride)[0]-(p+_systride)[1];
-      c=ll[k]-hl;
-      ll[k]+=hl;
-      hl=c;
-      /*No need to finish the transform; we'd just invert it later.*/
-      p=_src[k+1&3]+o;
-      ll[k+1&3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[k+2&3]+o;
-      ll[k+2&3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      p=_src[k+3&3]+o;
-      ll[k+3&3]=p[0]+p[1]+(p+_systride)[0]+(p+_systride)[1];
-      /*LL blending.*/
-      a=(int)OD_DIV_POW2_RE((ogg_int32_t)ll[0]*s[0]+
-       (ogg_int32_t)ll[1]*s[1]+(ogg_int32_t)ll[2]*s[2]+
-       (ogg_int32_t)ll[3]*s[3],log_blk_sz2p1);
-      /*Inverse Haar wavelet.*/
-      c=OD_DIV2_RE(a-hl);
-      a=OD_DIV2_RE(a+hl);
-      d=OD_DIV2_RE(c-hh);
-      c=OD_DIV2_RE(c+hh);
-      b=OD_DIV2_RE(a-lh);
-      a=OD_DIV2_RE(a+lh);
-      dst[0]=od_clamp255(a);
-      dst[1]=od_clamp255(b);
-      (dst+_dystride)[0]=od_clamp255(c);
-      (dst+_dystride)[1]=od_clamp255(d);
-      o+=2;
-      dst+=2;
-      for(k=0;k<4;k++)s[k]+=dsdi[k];
-    }
-    o0+=_systride<<1;
-    dst0+=_dystride<<1;
-    sidx0+=xblk_sz>>1;
-    for(k=0;k<4;k++){
-      s0[k]+=dsdj[k];
-      s[k]=s0[k];
-      dsdi[k]+=ddsdidj[k];
-    }
-  }
-}
-
-static void od_mc_blend8(unsigned char *_dst,int _dystride,
- const unsigned char *_src[4],int _systride,int _c,int _s1,int _s3,
- int _log_xblk_sz,int _log_yblk_sz){
-  if(_log_xblk_sz>1&&_log_yblk_sz>1){
-    /*Perform multiresolution blending.*/
-    if(_s1&&_s3){
-      od_mc_blend_multi8(_dst,_dystride,_src,_systride,
-       _log_xblk_sz,_log_yblk_sz);
-    }
-    else{
-      od_mc_blend_multi_split8(_dst,_dystride,_src,_systride,
-       _c,_s1,_s3,_log_xblk_sz,_log_yblk_sz);
-    }
-  }
-  else{
-    /*The block is too small; perform normal blending.*/
-    if(_s1&&_s3){
-      od_mc_blend_full8(_dst,_dystride,_src,_systride,
-       _log_xblk_sz,_log_yblk_sz);
-    }
-    else{
-      od_mc_blend_full_split8(_dst,_dystride,_src,_systride,_c,_s1,_s3,
-       _log_xblk_sz,_log_yblk_sz);
-    }
-  }
-}
-
-void od_mc_predict8(unsigned char *_dst,int _dystride,
- const unsigned char *_src,int _systride,const ogg_int32_t _mvx[4],
- const ogg_int32_t _mvy[4],int _interp_type,int _c,int _s1,int _s3,
- int _log_xblk_sz,int _log_yblk_sz){
-  const unsigned char *pred[4];
-  unsigned char        buf[4][16][16];
-  int                  r;
-  r=0;
-  switch(_interp_type){
-    case OD_MC_INTERP_VVVV:{
-      od_mc_predict1imv8(_dst,_dystride,_src,_systride,_mvx,_mvy,
-       MIDXS[0]/*0,1,2,3*/,0,_log_xblk_sz,_log_yblk_sz);
-    }break;
-    case OD_MC_INTERP_VVVB:r++;
-    case OD_MC_INTERP_VVBV:r++;
-    case OD_MC_INTERP_VBVV:r++;
-    case OD_MC_INTERP_BVVV:{
-      od_mc_predict1imv8(buf[0][0],sizeof(buf[0][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[1]/*0,0,2,3*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[1][0],sizeof(buf[1][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[2]/*1,1,2,3*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[2][0],sizeof(buf[2][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_log_xblk_sz,_log_yblk_sz);
-      pred[0+r&3]=buf[0][0];
-      pred[1+r&3]=buf[1][0];
-      pred[2+r&3]=buf[2][0];
-      pred[3+r&3]=buf[2][0];
-      od_mc_blend8(_dst,_dystride,pred,sizeof(buf[0][0]),
-       _c,_s1||(_c-r&3)!=0,_s3||(_c-r&3)!=1,_log_xblk_sz,_log_yblk_sz);
-    }break;
-    case OD_MC_INTERP_VBVB:r++;
-    case OD_MC_INTERP_BVBV:{
-      od_mc_predict1imv8(buf[0][0],sizeof(buf[0][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[3]/*0,0,3,3*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[1][0],sizeof(buf[1][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[4]/*1,1,2,2*/,r,_log_xblk_sz,_log_yblk_sz);
-      pred[0+r&3]=buf[0][0];
-      pred[1+r&3]=buf[1][0];
-      pred[2+r&3]=buf[1][0];
-      pred[3+r&3]=buf[0][0];
-      od_mc_blend8(_dst,_dystride,pred,sizeof(buf[0][0]),
-       _c,_s1||(_c-r&1),_s3||!(_c-r&1),_log_xblk_sz,_log_yblk_sz);
-    }break;
-    case OD_MC_INTERP_VBBV:r++;
-    case OD_MC_INTERP_BBVV:r++;
-    case OD_MC_INTERP_BVVB:r++;
-    case OD_MC_INTERP_VVBB:{
-      od_mc_predict1imv8(buf[0][0],sizeof(buf[0][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[1][0],sizeof(buf[1][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[0]/*0,1,2,3*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[2][0],sizeof(buf[2][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[6]/*2,1,2,2*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[3][0],sizeof(buf[3][0]),_src,_systride,
-       _mvx[3+r&3],_mvy[3+r&3],_log_xblk_sz,_log_yblk_sz);
-      pred[0+r&3]=buf[0][0];
-      pred[1+r&3]=buf[1][0];
-      pred[2+r&3]=buf[2][0];
-      pred[3+r&3]=buf[3][0];
-      od_mc_blend8(_dst,_dystride,pred,sizeof(buf[0][0]),
-       _c,_s1||(_c+2-r&2),_s3||(_c+1-r&2),_log_xblk_sz,_log_yblk_sz);
-    }break;
-    case OD_MC_INTERP_BBBV:r++;
-    case OD_MC_INTERP_BBVB:r++;
-    case OD_MC_INTERP_BVBB:r++;
-    case OD_MC_INTERP_VBBB:{
-      od_mc_predict1imv8(buf[0][0],sizeof(buf[0][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[5]/*0,1,0,0*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1imv8(buf[1][0],sizeof(buf[1][0]),_src,_systride,_mvx,_mvy,
-       MIDXS[7]/*0,1,1,1*/,r,_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[2][0],sizeof(buf[2][0]),_src,_systride,
-       _mvx[2+r&3],_mvy[2+r&3],_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[3][0],sizeof(buf[3][0]),_src,_systride,
-       _mvx[3+r&3],_mvy[3+r&3],_log_xblk_sz,_log_yblk_sz);
-      pred[0+r&3]=buf[0][0];
-      pred[1+r&3]=buf[1][0];
-      pred[2+r&3]=buf[2][0];
-      pred[3+r&3]=buf[3][0];
-      od_mc_blend8(_dst,_dystride,pred,sizeof(buf[0][0]),
-       _c,_s1||(_c-r&3)==0,_s3||(_c-r&3)==1,_log_xblk_sz,_log_yblk_sz);
-    }break;
-    case OD_MC_INTERP_BBBB:{
-      od_mc_predict1fmv8(buf[0][0],sizeof(buf[0][0]),_src,_systride,
-       _mvx[0],_mvy[0],_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[1][0],sizeof(buf[1][0]),_src,_systride,
-       _mvx[1],_mvy[1],_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[2][0],sizeof(buf[2][0]),_src,_systride,
-       _mvx[2],_mvy[2],_log_xblk_sz,_log_yblk_sz);
-      od_mc_predict1fmv8(buf[3][0],sizeof(buf[3][0]),_src,_systride,
-       _mvx[3],_mvy[3],_log_xblk_sz,_log_yblk_sz);
-      pred[0]=buf[0][0];
-      pred[1]=buf[1][0];
-      pred[2]=buf[2][0];
-      pred[3]=buf[3][0];
-      od_mc_blend8(_dst,_dystride,pred,sizeof(buf[0][0]),
-       _c,_s1,_s3,_log_xblk_sz,_log_yblk_sz);
-    }
-  }
-}
-
-#if 0
-#include <stdio.h>
-
-static unsigned char mask[4][4]={
-  {0, 8, 2,10},
-  {12,4,14, 6},
-  {3, 11,1, 9},
-  {15,7,13, 5}
-};
-
-static unsigned char img[16*7][16*7];
-
-static ogg_int32_t mvs[4][4][2];
-static ogg_int32_t mvs2[4][4][2];
-
-static int edge_types[4][4]={
-  {0x0,0x1,0x2,0x8},
-  {0x4,0x6,0xA,0xC},
-  {0x5,0x7,0xE,0xD},
-  {0x3,0xF,0xB,0x9}
-};
-
-static int edge_types2[8][8]={
-  {0x0,0x4,0x1,0x5,0x0,0x2,0xA,0x8},
-  {0x2,0x9,0x0,0x1,0x2,0xA,0x8,0x0},
-  {0x6,0x8,0x2,0xA,0xC,0x6,0xC,0x4},
-  {0x5,0x4,0x6,0xE,0x9,0x3,0xF,0xD},
-  {0x1,0x5,0x3,0xF,0xA,0xE,0xB,0x9},
-  {0x6,0xD,0x4,0x7,0xE,0xF,0xE,0xC},
-  {0x5,0x7,0xB,0xF,0xD,0x7,0xB,0xD},
-  {0x3,0xB,0xC,0x7,0x9,0x3,0x8,0x1}
-};
-
-static void fill_mvs(int _log_blk_sz){
-  int i;
-  int j;
-  for(j=0;j<4;j++){
-    for(i=0;i<4;i++){
-      mvs[j][i][0]=(mask[j+0&3][i+0&3]-8)<<_log_blk_sz+15^
-       mask[j+2&3][i+2&3]<<_log_blk_sz+12;
-      mvs[j][i][1]=(mask[j+1&3][i+1&3]-8)<<_log_blk_sz+15^
-       mask[j+3&3][i+3&3]<<_log_blk_sz+12;
-      mvs2[j][i][0]=(mask[j+3&3][i+3&3]-8)<<_log_blk_sz+15^
-       mask[j+1&3][i+1&3]<<_log_blk_sz+12;
-      mvs2[j][i][1]=(mask[j+2&3][i+2&3]-8)<<_log_blk_sz+15^
-       mask[j+0&3][i+0&3]<<_log_blk_sz+12;
-    }
-  }
-}
-
-static void fill_img(int _log_blk_sz){
-  int i;
-  int j;
-  for(j=0;j<7;j++){
-    for(i=0;i<7;i++){
-      int c;
-      int x;
-      int y;
-      c=mask[j&3][i&3];
-      c=c<<4|15;
-      for(y=j<<_log_blk_sz;y<(j+1)<<_log_blk_sz;y++){
-        for(x=i<<_log_blk_sz;x<(i+1)<<_log_blk_sz;x++){
-          img[y][x]=c;
-        }
-      }
-    }
-  }
-  for(j=0;j<6<<_log_blk_sz;j++){
-    for(i=0;i<6<<_log_blk_sz;i++){
-      printf("%2X%c",img[j][i],i+1<6<<_log_blk_sz?' ':'\n');
-    }
-  }
-}
-
-int main(void){
-  int log_blk_sz;
-  for(log_blk_sz=2;log_blk_sz<=4;log_blk_sz++){
-    int blk_sz;
-    int i;
-    int j;
-    blk_sz=1<<log_blk_sz;
-    fill_img(log_blk_sz);
-    fill_mvs(log_blk_sz);
-    for(j=0;j<4;j++){
-      for(i=0;i<4;i++){
-        ogg_int32_t   mvx[4];
-        ogg_int32_t   mvy[4];
-        unsigned char dst[17][17];
-        unsigned char dst2[4][9][9];
-        unsigned      mismatch[4][9][9];
-        int           etype;
-        int           x;
-        int           y;
-        int           c;
-        mvx[0]=mvs[j][i][0];
-        mvy[0]=mvs[j][i][1];
-        mvx[1]=mvs[j][i+1&3][0];
-        mvy[1]=mvs[j][i+1&3][1];
-        mvx[2]=mvs[j+1&3][i+1&3][0];
-        mvy[2]=mvs[j+1&3][i+1&3][1];
-        mvx[3]=mvs[j+1&3][i][0];
-        mvy[3]=mvs[j+1&3][i][1];
-        etype=edge_types[j][i];
-        printf("Block (%i,%i): size %i, interpolation type: %c%c%c%c (0x%X)\n",
-         i,j,1<<log_blk_sz,
-         etype&1?'V':'B',etype&2?'V':'B',
-         etype&4?'V':'B',etype&8?'V':'B',etype);
-        printf("<%8.4lf,%8.4lf> <%8.4lf,%8.4lf>\n",
-         mvx[0]/(double)0x40000,mvy[0]/(double)0x40000,
-         mvx[1]/(double)0x40000,mvy[1]/(double)0x40000);
-        printf("<%8.4lf,%8.4lf> <%8.4lf,%8.4lf>\n",
-         mvx[3]/(double)0x40000,mvy[3]/(double)0x40000,
-         mvx[2]/(double)0x40000,mvy[2]/(double)0x40000);
-        od_mc_predict8(dst[0],sizeof(dst[0]),
-         img[j+1<<log_blk_sz]+(i+1<<log_blk_sz),sizeof(img[0]),mvx,mvy,
-         etype,0,0,0,log_blk_sz,log_blk_sz);
-        for(y=0;y<blk_sz;y++){
-          for(x=0;x<blk_sz;x++){
-            printf("%2X%c",dst[y][x],x+1<blk_sz?' ':'\n');
-          }
-        }
-        printf("\n");
-        for(c=0;c<4;c++){
-          int s1;
-          int s3;
-          mvx[0]=mvs[j][i][0];
-          mvy[0]=mvs[j][i][1];
-          mvx[1]=mvs[j][i+1&3][0];
-          mvy[1]=mvs[j][i+1&3][1];
-          mvx[2]=mvs[j+1&3][i+1&3][0];
-          mvy[2]=mvs[j+1&3][i+1&3][1];
-          mvx[3]=mvs[j+1&3][i][0];
-          mvy[3]=mvs[j+1&3][i][1];
-          mvx[c+2&3]=mvs2[j][i][0];
-          mvy[c+2&3]=mvs2[j][i][1];
-          etype=edge_types2[j<<1|(c>>1)][i<<1|((c+1&3)>>1)];
-          if(1||!(c&1)){
-            etype&=~(1<<(c+1&3));
-            if(c==3)etype|=(etype&8)>>3;
-            else etype|=(etype&1<<c)<<1;
-            s1=0;
-          }
-          else s1=1;
-          if(s1||(etype>>c&1)){
-            mvx[c+1&3]=mvx[c]+mvx[c+1&3]>>1;
-            mvy[c+1&3]=mvy[c]+mvy[c+1&3]>>1;
-          }
-          if(1||(c&1)){
-            etype&=~(1<<(c+2&3));
-            if(c==1)etype|=(etype&1)<<3;
-            else etype|=(etype&1<<(c+3&3))>>1;
-            s3=0;
-          }
-          else s3=1;
-          if(s3||(etype<<(-c&3)&8)){
-            mvx[c+3&3]=mvx[c]+mvx[c+3&3]>>1;
-            mvy[c+3&3]=mvy[c]+mvy[c+3&3]>>1;
-          }
-          printf("Block (%i.%i,%i.%i): size %i, "
-           "interpolation type: %c%c%c%c (0x%X)\n",
-           i,((c+1&3)>>1)*5,j,(c>>1)*5,1<<log_blk_sz-1,
-           etype&1?'V':'B',etype&2?'V':'B',
-           etype&4?'V':'B',etype&8?'V':'B',etype);
-          printf("<%9.5lf,%9.5lf> <%9.5lf,%9.5lf>\n",
-           mvx[0]/(double)0x40000,mvy[0]/(double)0x40000,
-           mvx[1]/(double)0x40000,mvy[1]/(double)0x40000);
-          printf("<%9.5lf,%9.5lf> <%9.5lf,%9.5lf>\n",
-           mvx[3]/(double)0x40000,mvy[3]/(double)0x40000,
-           mvx[2]/(double)0x40000,mvy[2]/(double)0x40000);
-          od_mc_predict8(dst2[c][0],sizeof(dst2[c][0]),
-           img[(j+1<<1|(c>>1))<<log_blk_sz-1]+
-           ((i+1<<1|((c+1&3)>>1))<<log_blk_sz-1),
-           sizeof(img[0]),mvx,mvy,etype,c,s1,s3,log_blk_sz-1,log_blk_sz-1);
-          memset(mismatch[c][0],0,sizeof(mismatch[c]));
-          switch(c){
-            case 0:{
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst[0][x])mismatch[c][0][x]++;
-              }
-              for(y=1;y<blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst[y][0])mismatch[c][y][0]++;
-              }
-            }break;
-            case 1:{
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst[0][x+(blk_sz>>1)])mismatch[c][0][x]++;
-              }
-              for(y=1;y<blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst[y][blk_sz]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }
-              for(y=0;y<blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst2[0][y][blk_sz>>1])mismatch[c][y][0]++;
-              }
-            }break;
-            case 2:{
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst2[1][blk_sz>>1][x])mismatch[c][0][x]++;
-              }
-              for(y=0;y<blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst[y+(blk_sz>>1)][blk_sz]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][blk_sz>>1][x]!=dst[blk_sz][x+(blk_sz>>1)]){
-                  mismatch[c][blk_sz>>1][x]++;
-                }
-              }
-            }break;
-            case 3:{
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][0][x]!=dst2[0][blk_sz>>1][x])mismatch[c][0][x]++;
-              }
-              for(y=1;y<blk_sz>>1;y++){
-                if(dst2[c][y][blk_sz>>1]!=dst2[2][y][0]){
-                  mismatch[c][y][blk_sz>>1]++;
-                }
-              }
-              for(x=0;x<blk_sz>>1;x++){
-                if(dst2[c][blk_sz>>1][x]!=dst[blk_sz][x]){
-                  mismatch[c][blk_sz>>1][x]++;
-                }
-              }
-              for(y=0;y<blk_sz>>1;y++){
-                if(dst2[c][y][0]!=dst[y+(blk_sz>>1)][0])mismatch[c][y][0]++;
-              }
-            }break;
-          }
-          for(y=0;y<blk_sz>>1;y++){
-            for(x=0;x<blk_sz>>1;x++){
-              printf("%c%2X",mismatch[c][y][x]?'!':' ',dst2[c][y][x]);
-            }
-            printf("\n");
-          }
-          printf("\n");
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-#endif
diff --git a/src/mcenc-level.c b/src/mcenc-level.c
deleted file mode 100644 (file)
index 63d04d0..0000000
+++ /dev/null
@@ -1,4448 +0,0 @@
-/*Daala video codec
-Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <limits.h>
-#include <string.h>
-
-/*TODO:
- - Develope a real encoding and measure real bits.
- - Thresholds for DP.
-   + How do we calculate them?
-   + How do they propagate between frames (block sizes change)
-   + Compute rate change of trailing MVs correctly.
- - Compute bits needed for labels during DP (to bias towards using the same
-    label).
- - Allow setting a maximum decimation level.
- - Allow setting a maximum refinement level.*/
-
-/*The frame number to animate.*/
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-#define ANI_FRAME (69)
-#endif
-
-#define OD_MC_USEB (1<<0)
-#define OD_MC_USEV (1<<1)
-
-typedef struct od_mv_node            od_mv_node;
-typedef struct od_mv_dp_state        od_mv_dp_state;
-typedef struct od_mv_dp_node         od_mv_dp_node;
-typedef struct od_mv_err_node        od_mv_err_node;
-
-#include "mc.h"
-#include "encint.h"
-
-typedef int          od_offset[2];
-typedef int          od_pattern[8];
-typedef ogg_uint16_t od_sad4[4];
-
-
-
-/*The state information used by the motion estimation process that is not
-   required by the decoder.
-  Some of this information corresponds to a vertex in the MV mesh.
-  Other pieces correspond to a block whose upper-left corner is located at that
-   vertex.*/
-struct od_mv_node{
-  /*The historical motion vectors for EPZS^2, stored at full-pel resolution.
-    Indexed by [time][reference_type][component].*/
-  int           mvs[3][2][2];
-  /*The current estimated rate of this MV.*/
-  unsigned      mv_rate:16;
-  /*The current estimated rate of the edge labels.*/
-  unsigned      lb_rate:4;
-  /*The number of blocks influenced by this MV who failed their SAD checks.*/
-  unsigned      needs_check:4;
-  /*The current size of the block with this MV at its upper-left.*/
-  unsigned      log_mvb_sz:2;
-  /*The index of the exterior corner of that block.*/
-  unsigned      c:2;
-  /*The edge splitting index of that block.*/
-  unsigned      s:2;
-  /*The current distortion of that block.*/
-  ogg_int32_t   sad;
-  /*The SAD for BMA predictor centered on this node.
-    Used for the dynamic thresholds of the initial EPZS^2 pass.*/
-  ogg_int32_t   bma_sad;
-  /*The location of this node in the grid.
-    Used to retrieve it after pulling it off the decimation heap.*/
-  int           vx;
-  int           vy;
-  /*The change in global distortion for decimating this node.*/
-  ogg_int32_t   dd;
-  /*The change in global rate for decimating this node.*/
-  int           dr;
-  /*The position of this node in the heap.*/
-  int           heapi;
-};
-
-#define OD_DP_NSTATES_MAX     (9)
-#define OD_DP_NBLOCKS_MAX     (8)
-#define OD_DP_NPREDICTED_MAX  (17)
-#define OD_DP_NCHANGEABLE_MAX (6)
-
-/*One of the trellis states in the dynamic prgram.*/
-struct od_mv_dp_state{
-  /*The MV to install for this state.*/
-  int           mv[2];
-  /*The best state in the previous DP node to use with this one, or -1 to
-     indicate the start of the path.*/
-  int           prevsi;
-  /*The total rate change (thus far) produced by choosing this path.*/
-  int           dr;
-  /*The total distortion change (thus far) produced by choosing this path.*/
-  ogg_int32_t   dd;
-  /*The new SAD of each block affected by the the DP between this node and the
-     previous node.
-    These are installed if the path is selected.*/
-  ogg_int32_t   block_sads[OD_DP_NBLOCKS_MAX];
-  /*The new rate of each MV predicted by this node.
-    These are installed if the path is selected.
-    These may supersede the rates reported in previous nodes on the path.*/
-  int           pred_mv_rates[OD_DP_NPREDICTED_MAX];
-  /*The new rate of this MV.*/
-  int           mv_rate;
-};
-
-/*A node on the dynamic programming path.*/
-struct od_mv_dp_node{
-  od_mv_grid_pt  *mvg;
-  od_mv_node     *mv;
-  /*The number of states considered in this node.*/
-  int             nstates;
-  /*The number of blocks affected by states in this node.*/
-  int             nblocks;
-  /*The number of MVs predicted by this node.*/
-  int             npredicted;
-  /*The number of those MVs that are potentially changeable by future DP
-     states.*/
-  int             npred_changeable;
-  /*The original MV used by this node.*/
-  int             original_mv[2];
-  /*The original edge label used b this node.*/
-  unsigned char   original_etype:1;
-  /*The original rate of this MV.*/
-  int             original_mv_rate;
-  /*The original MV rates before predictors were changed by this node.
-    This only includes the ones that are actually changeable.*/
-  int             original_mv_rates[OD_DP_NCHANGEABLE_MAX];
-  /*The last node we save/restore in order to perform prediction.*/
-  od_mv_dp_node  *min_predictor_node;
-  /*The set of trellis states.*/
-  od_mv_dp_state  states[OD_DP_NSTATES_MAX];
-  /*Up to 8 blocks can be influenced by this MV and the previous MV.*/
-  od_mv_node     *blocks[OD_DP_NBLOCKS_MAX];
-  /*The vertices whose MV we predict.*/
-  /*Up to 20 MVs can be predicted by this one, but 3 of those are MVs on the
-     DP trellis whose value we have yet to determine.*/
-  od_mv_grid_pt  *predicted_mvgs[OD_DP_NPREDICTED_MAX];
-  od_mv_node     *predicted_mvs[OD_DP_NPREDICTED_MAX];
-};
-
-struct od_mv_est_ctx{
-  od_enc_ctx      *enc;
-  /*A cache of the SAD values used during decimation.
-    Indexed by [vy>>log_mvb_sz][vx>>log_mvb_sz][log_mvb_sz][s], where s is the
-     edge split state.
-    The SAD of top-level blocks (log_mvb_sz==2) is not stored in this cache,
-     since it is only needed once.*/
-  od_sad4        **sad_cache[2];
-  /*The state of the MV mesh specific to the encoder.*/
-  od_mv_node     **mvs;
-  /*A temporary copy of the decoder-side MV grid used to save-and-restore the
-     MVs when attempting sub-pel refinement.*/
-  od_mv_grid_pt  **refine_grid;
-  /*Space for storing the Viterbi trellis used for DP refinment.*/
-  od_mv_dp_node   *dp_nodes;
-  /*The decimation heap.*/
-  od_mv_node     **dec_heap;
-  /*The number of vertices in the decimation heap.*/
-  int              dec_nheap;
-  /*The number of undecimated vertices in each row.*/
-  unsigned        *row_counts;
-  /*The number of undecimated vertices in each column.*/
-  unsigned        *col_counts;
-  /*The weights used to produce the accelerated MV predictor.*/
-  ogg_int32_t      mvapw[2][2];
-  /*Flags indicating which MVs have already been tested during the initial
-    EPZS^2 pass.*/
-  unsigned char    hit_cache[64][64];
-  /*The flag used by the current EPZS search iteration.*/
-  unsigned         hit_bit;
-  /*The Langrangian multiplier used for R-D optimization.*/
-  int              lambda;
-  /*Configuration.*/
-  /*The flags indicating which feature to use.*/
-  int              flags;
-  /*The smallest resolution to refine MVs to.*/
-  int              mv_res_min;
-  /*The deepest level to refine to.*/
-  int              level_max;
-};
-
-
-
-/*The subdivision level of a MV in the mesh, given its position (mod 4).*/
-static const int OD_MC_LEVEL[4][4]={
-  {0,4,2,4},
-  {4,3,4,3},
-  {2,4,1,4},
-  {4,3,4,3}
-};
-
-/*Ancestor lists for a vertex.
-  These are stored as lists of offsets to the vertices in the domain.
-  Level 0 ancestors are not included, as they cannot be decimated.*/
-/*Lists for level 2 vertices.*/
-static const od_offset OD_ANCESTORS2[2][2]={
-  {{ 0,-2},{ 0, 2}},
-  {{-2, 0},{ 2, 0}},
-};
-/*Lists for level 3 vertices.*/
-static const od_offset OD_ANCESTORS3[4][5]={
-  {{ 1,-1},{-1, 1},{ 1,-3},{-3, 1},{ 1, 1}},
-  {{-1,-1},{ 1, 1},{-1,-3},{-1, 1},{ 3, 1}},
-  {{-1,-1},{ 1, 1},{-3,-1},{ 1,-1},{ 1, 3}},
-  {{ 1,-1},{-1, 1},{-1,-1},{ 3,-1},{-1, 3}},
-};
-/*Lists for level 4 vertices.*/
-static const od_offset OD_ANCESTORS4[8][9]={
-  {{ 0,-1},{ 0, 1},{-1,-2},{ 1, 0},{-1, 2},{-3,-2},{ 1,-2},{-3, 2},{ 1, 2}},
-  {{ 0,-1},{ 0, 1},{ 1,-2},{-1, 0},{ 1, 2},{-1,-2},{ 3,-2},{-1, 2},{ 3, 2}},
-  {{-1, 0},{ 1, 0},{-2,-1},{ 2,-1},{ 0, 1},{-2,-3},{ 2,-3},{-2, 1},{ 2, 1}},
-  {{-1, 0},{ 1, 0},{ 0,-1},{-2, 1},{ 2, 1},{ 0,-3},{-4, 1},{ 0, 1},{ 4, 1}},
-  {{ 0,-1},{ 0, 1},{ 1,-2},{-1, 0},{ 1, 2},{ 1,-4},{-3, 0},{ 1, 0},{ 1, 4}},
-  {{ 0,-1},{ 0, 1},{-1,-2},{ 1, 0},{-1, 2},{-1,-4},{-1, 0},{ 3, 0},{-1, 4}},
-  {{-1, 0},{ 1, 0},{ 0,-1},{-2, 1},{ 2, 1},{-2,-1},{ 2,-1},{-2, 3},{ 2, 3}},
-  {{-1, 0},{ 1, 0},{-2,-1},{ 2,-1},{ 0, 1},{-4,-1},{ 0,-1},{ 4,-1},{ 0, 3}},
-};
-/*The number of ancestors in each list in the grid pattern.*/
-static const int OD_NANCESTORS[4][4]={
-  {0,9,2,9},
-  {9,5,9,5},
-  {2,9,0,9},
-  {9,5,9,5}
-};
-/*The lists for each vertex in the grid pattern.*/
-static const od_offset *OD_ANCESTORS[4][4]={
-  {NULL,            OD_ANCESTORS4[0],OD_ANCESTORS2[0],OD_ANCESTORS4[1]},
-  {OD_ANCESTORS4[2],OD_ANCESTORS3[0],OD_ANCESTORS4[3],OD_ANCESTORS3[1]},
-  {OD_ANCESTORS2[1],OD_ANCESTORS4[4],NULL            ,OD_ANCESTORS4[5]},
-  {OD_ANCESTORS4[6],OD_ANCESTORS3[2],OD_ANCESTORS4[7],OD_ANCESTORS3[3]}
-};
-
-
-
-/*Computes the SAD of the input image against the given predictor.*/
-static ogg_int32_t od_state_sad8(od_state *_state,const unsigned char *_p,
- int _pystride,int _pxstride,int _x,int _y,int _log_blk_sz){
-  od_img_plane        *iplane;
-  const unsigned char *p;
-  unsigned char       *src;
-  unsigned char       *src0;
-  int                  width;
-  int                  height;
-  int                  i;
-  int                  j;
-  ogg_int32_t          ret;
-  width=height=1<<_log_blk_sz;
-  /*TODO: Use picture dimenstions, not frame dimensions.*/
-  if(_x<0){
-    width+=_x;
-    _p-=_x*_pxstride;
-    _x=0;
-  }
-  if(_y<0){
-    height+=_y;
-    _p-=_y*_pystride;
-    _y=0;
-  }
-  if(_x+width>_state->input.width)width=_state->input.width-_x;
-  if(_y+height>_state->input.height)height=_state->input.height-_y;
-  /*fprintf(stderr,"[%i,%i]x[%i,%i]\n",_x,_y,width,height);*/
-  iplane=_state->input.planes+0;
-  src0=iplane->data+_y*iplane->ystride+_x*iplane->xstride;
-  ret=0;
-  for(j=0;j<height;j++){
-    src=src0;
-    p=_p;
-    for(i=0;i<width;i++){
-      ret+=abs(p[0]-src[0]);
-      src+=iplane->xstride;
-      p+=_pxstride;
-    }
-    src0+=iplane->ystride;
-    _p+=_pystride;
-  }
-  return ret;
-}
-
-
-
-static void od_mv_est_init(od_mv_est_ctx *_est,od_enc_ctx *_enc){
-  int nhmvbs;
-  int nvmvbs;
-  int vx;
-  int vy;
-  _est->enc=_enc;
-  nhmvbs=_enc->state.nhmbs+1<<2;
-  nvmvbs=_enc->state.nvmbs+1<<2;
-  _est->sad_cache[1]=(od_sad4 **)od_malloc_2d(nvmvbs>>1,nhmvbs>>1,
-   sizeof(_est->sad_cache[1][0][0]));
-  _est->sad_cache[0]=(od_sad4 **)od_malloc_2d(nvmvbs,nhmvbs,
-   sizeof(_est->sad_cache[1][0][0]));
-  _est->mvs=(od_mv_node **)od_calloc_2d(nvmvbs+1,nhmvbs+1,
-   sizeof(_est->mvs[0][0]));
-  _est->refine_grid=(od_mv_grid_pt **)od_malloc_2d(nvmvbs+1,nhmvbs+1,
-   sizeof(_est->refine_grid[0][0]));
-  _est->dp_nodes=(od_mv_dp_node *)_ogg_malloc(
-   sizeof(od_mv_dp_node)*(OD_MAXI(nhmvbs,nvmvbs)+1));
-  _est->row_counts=(unsigned *)_ogg_malloc(sizeof(unsigned)*(nvmvbs+1));
-  _est->col_counts=(unsigned *)_ogg_malloc(sizeof(unsigned)*(nhmvbs+1));
-  for(vy=0;vy<=nvmvbs;vy++)for(vx=0;vx<=nhmvbs;vx++){
-    _est->mvs[vy][vx].vx=vx;
-    _est->mvs[vy][vx].vy=vy;
-    _est->mvs[vy][vx].heapi=-1;
-    _enc->state.mv_grid[vy][vx].valid=1;
-  }
-  _est->dec_heap=(od_mv_node **)_ogg_malloc(
-   (nvmvbs+1)*(nhmvbs+1)*sizeof(_est->dec_heap[0]));
-  _est->hit_bit=0;
-  /*TODO: Allow configuration.*/
-  _est->mv_res_min=0;
-  _est->flags=OD_MC_USEB|OD_MC_USEV;
-  _est->level_max=4;
-}
-
-static void od_mv_est_clear(od_mv_est_ctx *_est){
-  _ogg_free(_est->dec_heap);
-  _ogg_free(_est->col_counts);
-  _ogg_free(_est->row_counts);
-  _ogg_free(_est->dp_nodes);
-  od_free_2d(_est->refine_grid);
-  od_free_2d(_est->mvs);
-  od_free_2d(_est->sad_cache[0]);
-  od_free_2d(_est->sad_cache[1]);
-}
-
-
-
-/*STAGE 1: INITIAL MV ESTIMATES (via EPZS^2).*/
-
-
-
-/*The maximum luma plane SAD value for accepting set A predictors.*/
-static const int OD_YSAD_THRESH1[3]={16,64,256};
-/*The amount to right shift the minimum error by when inflating it for
-   computing the second maximum luma plane SAD threshold.*/
-#define OD_YSAD_THRESH2_SCALE_BITS (3)
-/*The amount to add to the second maximum luma plane threshold when inflating
-   it.*/
-static const int OD_YSAD_THRESH2_OFFS[3]={8,32,128};
-
-/*The vector offsets in the X direction for each search site in the square
-   pattern.*/
-static const int OD_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
-/*The vector offsets in the Y direction for each search site in the square
-   pattern.*/
-static const int OD_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
-
-/*The number of sites to search of each boundary condition in the square
-   pattern.
-  Bit flags for the boundary conditions are as follows:
-  1: -32==dx
-  2:      dx==31
-  4: -32==dy
-  8:      dy==31*/
-static const int OD_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
-/*The list of sites to search for each boudnary condition in the square
-   pattern.*/
-static const od_pattern OD_SQUARE_SITES[11]={
-  /* -32<dx<31,   -32<dy<31*/
-  {0,1,2,3,5,6,7,8},
-  /*-32==dx,      -32<dy<31*/
-  {1,2,5,7,8},
-  /*     dx==31,  -32<dy<31*/
-  {0,1,3,6,7},
-  /*-32==dx==31,  -32<dy<31*/
-  {-1},
-  /* -32<dx<31,  -32==dy*/
-  {3,5,6,7,8},
-  /*-32==dx,     -32==dy*/
-  {5,7,8},
-  /*     dx==31, -32==dy*/
-  {3,6,7},
-  /*-32==dx==31, -32==dy*/
-  {-1},
-  /* -32<dx<31,       dy==31*/
-  {0,1,2,3,5},
-  /*-32==dx,          dy==31*/
-  {1,2,5},
-  /*     dx==31,      dy==31*/
-  {0,1,3}
-};
-
-/*The number of sites to search of each boundary condition in the diamond
-   pattern.
-  Bit flags for the boundary conditions are as follows:
-  1: -32==dx
-  2:      dx==31
-  4: -32==dy
-  8:      dy==31*/
-static const int OD_DIAMOND_NSITES[11]={4,3,3,0,3,2,2,0,3,2,2};
-/*The list of sites to search for each boudnary condition in the square
-   pattern.*/
-static const od_pattern OD_DIAMOND_SITES[11]={
-  /* -32<dx<31,   -32<dy<31*/
-  {1,3,5,7},
-  /*-32==dx,      -32<dy<31*/
-  {1,5,7},
-  /*     dx==31,  -32<dy<31*/
-  {1,3,7},
-  /*-32==dx==31,  -32<dy<31*/
-  {-1},
-  /* -32<dx<31,  -32==dy*/
-  {3,5,7},
-  /*-32==dx,     -32==dy*/
-  {5,7},
-  /*     dx==31, -32==dy*/
-  {3,7},
-  /*-32==dx==31, -32==dy*/
-  {-1},
-  /* -32<dx<31,       dy==31*/
-  {1,3,5},
-  /*-32==dx,          dy==31*/
-  {1,5},
-  /*     dx==31,      dy==31*/
-  {1,3}
-};
-
-
-
-/*Clear the cache of motion vectors we've examined.*/
-static void od_mv_est_clear_hit_cache(od_mv_est_ctx *_est){
-  if(_est->hit_bit++==0)memset(_est->hit_cache,0,sizeof(_est->hit_cache));
-  else _est->hit_bit&=UCHAR_MAX;
-}
-
-/*Test if a motion vector has been examined.*/
-static int od_mv_est_is_hit(od_mv_est_ctx *_est,int _mvx,int _mvy){
-  return _est->hit_cache[_mvy+32][_mvx+32]==_est->hit_bit;
-}
-
-/*Mark a motion vector examined.*/
-static void od_mv_est_set_hit(od_mv_est_ctx *_est,int _mvx,int _mvy){
-  _est->hit_cache[_mvy+32][_mvx+32]=(unsigned char)_est->hit_bit;
-}
-
-/*Gets the predictor for a given MV node at the given MV resolution.*/
-static void od_state_get_predictor(od_state *_state,int _pred[2],
- int _vx,int _vy,int _level,int _mv_res){
-  int nhmvbs;
-  int nvmvbs;
-  nhmvbs=_state->nhmbs+1<<2;
-  nvmvbs=_state->nvmbs+1<<2;
-  if(_vx<2||_vy<2||_vx>nhmvbs-2||_vy>nvmvbs-2)_pred[0]=_pred[1]=0;
-  else{
-    od_mv_grid_pt *cneighbors[4];
-    int            a[4][2];
-    int            mvb_sz;
-    int            ncns;
-    int            ci;
-    mvb_sz=1<<(4-_level>>1);
-    ncns=4;
-    if(_level==0){
-      cneighbors[0]=_state->mv_grid[_vy-4]+_vx-4;
-      cneighbors[1]=_state->mv_grid[_vy-4]+_vx;
-      cneighbors[2]=_state->mv_grid[_vy-4]+_vx+4;
-      cneighbors[3]=_state->mv_grid[_vy]+_vx-4;
-    }
-    else{
-      if(_level&1){
-        cneighbors[0]=_state->mv_grid[_vy-mvb_sz]+_vx-mvb_sz;
-        cneighbors[1]=_state->mv_grid[_vy-mvb_sz]+_vx+mvb_sz;
-        cneighbors[2]=_state->mv_grid[_vy+mvb_sz]+_vx-mvb_sz;
-        cneighbors[3]=_state->mv_grid[_vy+mvb_sz]+_vx+mvb_sz;
-      }
-      else{
-        cneighbors[0]=_state->mv_grid[_vy-mvb_sz]+_vx;
-        cneighbors[1]=_state->mv_grid[_vy]+_vx-mvb_sz;
-        /*NOTE: Only one of these candidatss can be excluded at a time, so
-           there will always be at least 3.*/
-        if(_vx+mvb_sz>_vx+4&~3)ncns--;
-        else cneighbors[2]=_state->mv_grid[_vy]+_vx+mvb_sz;
-        if(_vy+mvb_sz>_vy+4&~3)ncns--;
-        else cneighbors[ncns-1]=_state->mv_grid[_vy+mvb_sz]+_vx;
-      }
-    }
-    for(ci=0;ci<ncns;ci++){
-      a[ci][0]=cneighbors[ci]->mv[0];
-      a[ci][1]=cneighbors[ci]->mv[1];
-    }
-    /*Median-of-4.*/
-    if(ncns>3){
-      /*fprintf(stderr,"Median of 4:\n");
-      fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1],a[3][0],a[3][1]);*/
-/*
-Sorting network for 4 elements:
-0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
-0001 0010 0011 0100 0101 0110 0111 1001 1010 1011 1101
-0:1
-0010 0010 0011 0100 0110 0110 0111 1010 1010 1011 1110
-0010 0011 0100 0110 0111 1010 1011
-2:3
-0010 0011 1000 1010 1011 1100 1110
-0010 0011 1010 1011
-0:2
-0010 0110 1010 1110
-0010 0110 1010
-1:3
-1000 1100 1010
-1010
-This last compare is unneeded for a median:
-1:2
-1100
-*/
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      OD_SORT2I(a[2][0],a[3][0]);
-      OD_SORT2I(a[2][1],a[3][1]);
-      OD_SORT2I(a[0][0],a[2][0]);
-      OD_SORT2I(a[0][1],a[2][1]);
-      OD_SORT2I(a[1][0],a[3][0]);
-      OD_SORT2I(a[1][1],a[3][1]);
-      /*fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1],a[3][0],a[3][1]);*/
-      _pred[0]=OD_DIV_POW2_RE(a[1][0]+a[2][0],_mv_res+1);
-      _pred[1]=OD_DIV_POW2_RE(a[1][1]+a[2][1],_mv_res+1);
-    }
-    /*Median-of-3.*/
-    else{
-      /*fprintf(stderr,"Median of 3:\n");
-      fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1]);*/
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      OD_SORT2I(a[1][0],a[2][0]);
-      OD_SORT2I(a[1][1],a[2][1]);
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      /*fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1]);*/
-      _pred[0]=OD_DIV_POW2_RE(a[1][0],_mv_res);
-      _pred[1]=OD_DIV_POW2_RE(a[1][1],_mv_res);
-    }
-  }
-}
-
-/*Estimate the number of bits that will be used to encode the given MV.
-  The predictor must already have been subtracted off.*/
-static int od_mv_est_bits(int _dx,int _dy){
-  return od_ilog(abs(_dx))+(_dx!=0)+od_ilog(abs(_dy))+(_dy!=0)+2;
-}
-
-/*Computes the SAD of a block with the given parameters.*/
-static ogg_int32_t od_mv_est_sad8(od_mv_est_ctx *_est,int _ref,
- int _vx,int _vy,int _c,int _s,int _log_mvb_sz){
-  od_state      *state;
-  unsigned char  pred[16][16];
-  state=&_est->enc->state;
-  od_state_pred_block_from_setup(state,pred[0],sizeof(pred[0]),_ref,0,
-   _vx,_vy,_c,_s,_log_mvb_sz);
-  return od_state_sad8(state,pred[0],sizeof(pred[0]),1,_vx-2<<2,_vy-2<<2,
-   _log_mvb_sz);
-}
-
-/*Checks to make sure our current mv_rate and sad values are correct.
-  This is used for debugging only.*/
-void od_mv_est_check_rd_block_state(od_mv_est_ctx *_est,int _ref,
- int _vx,int _vy,int _log_mvb_sz){
-  od_state      *state;
-  int half_mvb_sz;
-  state=&_est->enc->state;
-  half_mvb_sz=1<<_log_mvb_sz-1;
-  if(_log_mvb_sz>0&&state->mv_grid[_vy+half_mvb_sz][_vx+half_mvb_sz].valid){
-    od_mv_est_check_rd_block_state(_est,_ref,_vx,_vy,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx+half_mvb_sz,_vy,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx,_vy+half_mvb_sz,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx+half_mvb_sz,_vy+half_mvb_sz,_log_mvb_sz-1);
-  }
-  else{
-    od_mv_node  *block;
-    ogg_int32_t  sad;
-    int          c;
-    int          s;
-    block=_est->mvs[_vy]+_vx;
-    if(block->log_mvb_sz!=_log_mvb_sz){
-      fprintf(stderr,
-       "Failure at node (%i,%i): log_mvb_sz should be %i (is %i)\n",
-       _vx,_vy,_log_mvb_sz,block->log_mvb_sz);
-    }
-    if(_log_mvb_sz<2){
-      int mask;
-      mask=(1<<_log_mvb_sz+1)-1;
-      c=!!(_vx&mask);
-      if(_vy&mask)c=3-c;
-      if(block->c!=c){
-        fprintf(stderr,"Failure at node (%i,%i): c should be %i (is %i)\n",
-         _vx,_vy,c,block->c);
-      }
-      s=state->mv_grid[_vy+(OD_VERT_DY[c+1&3]<<_log_mvb_sz)][
-       _vx+(OD_VERT_DX[c+1&3]<<_log_mvb_sz)].valid|
-       state->mv_grid[_vy+(OD_VERT_DY[c+3&3]<<_log_mvb_sz)][
-       _vx+(OD_VERT_DX[c+3&3]<<_log_mvb_sz)].valid<<1;
-    }
-    else{
-      c=0;
-      s=3;
-    }
-    if(block->s!=s){
-      fprintf(stderr,"Failure at node (%i,%i): s should be %i (is %i)\n",
-       _vx,_vy,s,block->s);
-    }
-    sad=od_mv_est_sad8(_est,_ref,_vx,_vy,c,s,_log_mvb_sz);
-    if(block->sad!=sad){
-      fprintf(stderr,"Failure at node (%i,%i): sad should be %i (is %i)\n",
-       _vx,_vy,sad,block->sad);
-    }
-  }
-}
-
-/*Checks to make sure our current mv_rate and sad values are correct.
-  This is used for debugging only.*/
-void od_mv_est_check_rd_state(od_mv_est_ctx *_est,int _ref,int _mv_res){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  for(vy=0;vy<nvmvbs;vy+=4){
-    for(vx=0;vx<nhmvbs;vx+=4){
-      od_mv_est_check_rd_block_state(_est,_ref,vx,vy,2);
-    }
-  }
-  for(vy=0;vy<nvmvbs;vy++)for(vx=0;vx<nhmvbs;vx++){
-    od_mv_grid_pt *mvg;
-    od_mv_node    *mv;
-    int            pred[2];
-    int            mv_rate;
-    mvg=state->mv_grid[vy]+vx;
-    if(!mvg->valid)continue;
-    mv=_est->mvs[vy]+vx;
-    if(vx>=2&&vx<=nhmvbs-2&&vy>=2&&vy<=nvmvbs-2){
-      od_state_get_predictor(state,pred,vx,vy,OD_MC_LEVEL[vy&3][vx&3],_mv_res);
-      mv_rate=od_mv_est_bits(
-       (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-    }
-    else pred[0]=pred[1]=mv_rate=0;
-    if(mv_rate!=mv->mv_rate){
-      fprintf(stderr,"Failure at node (%i,%i): mv_rate should be %i (is %i)\n",
-       vx,vy,mv_rate,mv->mv_rate);
-      fprintf(stderr,"Predictor was: (%i,%i)   MV was: (%i,%i)\n",
-       pred[0],pred[1],mvg->mv[0]>>_mv_res,mvg->mv[1]>>_mv_res);
-    }
-  }
-}
-
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-static const unsigned char OD_YCbCr_MVCAND[3]={210, 16,214};
-#endif
-
-static void od_mv_est_init_mv(od_mv_est_ctx *_est,int _ref,int _vx,int _vy){
-  od_state      *state;
-  od_img_plane  *iplane;
-  od_mv_grid_pt *mvg;
-  od_mv_node    *mv;
-  od_mv_node    *cneighbors[4];
-  od_mv_node    *pneighbors[4];
-  ogg_int32_t    t2;
-  ogg_int32_t    best_sad;
-  ogg_int32_t    best_cost;
-  int            best_rate;
-  int            cands[6][2];
-  int            best_vec[2];
-  int            a[4][2];
-  int            refi;
-  int            level;
-  int            log_mvb_sz;
-  int            mvb_sz;
-  int            bx;
-  int            by;
-  int            ncns;
-  int            mvxmin;
-  int            mvxmax;
-  int            mvymin;
-  int            mvymax;
-  int            candx;
-  int            candy;
-  int            predx;
-  int            predy;
-  int            ci;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  int            x0;
-  int            y0;
-#endif
-  /*fprintf(stderr,"Initial search for MV (%i,%i):\n",_vx,_vy);*/
-  state=&_est->enc->state;
-  refi=state->ref_imgi[_ref];
-  iplane=state->ref_imgs[refi].planes+0;
-  mv=_est->mvs[_vy]+_vx;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  mvg=state->mv_grid[_vy]+_vx;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    od_state_mc_predict(state,_ref);
-    od_state_fill_vis(state);
-    x0=(_vx-2<<3)+(OD_UMV_PADDING<<1);
-    y0=(_vy-2<<3)+(OD_UMV_PADDING<<1);
-  }
-#endif
-  /*fprintf(stderr,"Level %i (%ix%i block)\n",level,mvb_sz<<2,mvb_sz<<2);*/
-  bx=_vx-2<<2;
-  by=_vy-2<<2;
-  mvxmin=OD_MAXI(bx-(mvb_sz<<2)-32,-16)-(bx-(mvb_sz<<2));
-  mvxmax=OD_MINI(bx+(mvb_sz<<2)+32,state->info.frame_width+16)-
-   (bx+(mvb_sz<<2))-1;
-  mvymin=OD_MAXI(by-(mvb_sz<<2)-32,-16)-(by-(mvb_sz<<2));
-  mvymax=OD_MINI(by+(mvb_sz<<2)+32,state->info.frame_height+16)-
-   (by+(mvb_sz<<2))-1;
-  /*fprintf(stderr,"(%i,%i): Search range: [%i,%i]x[%i,%i]\n",
-   bx,by,mvxmin,mvymin,mvxmax,mvymax);*/
-  bx-=mvb_sz<<1;
-  by-=mvb_sz<<1;
-  ncns=4;
-  if(level==0){
-    cneighbors[0]=_est->mvs[_vy-4]+_vx-4;
-    cneighbors[1]=_est->mvs[_vy-4]+_vx;
-    cneighbors[2]=_est->mvs[_vy-4]+_vx+4;
-    cneighbors[3]=_est->mvs[_vy]+_vx-4;
-    pneighbors[0]=_est->mvs[_vy-4]+_vx;
-    pneighbors[1]=_est->mvs[_vy]+_vx-4;
-    pneighbors[2]=_est->mvs[_vy]+_vx+4;
-    pneighbors[3]=_est->mvs[_vy+4]+_vx;
-  }
-  else{
-    if(level&1){
-      pneighbors[0]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      pneighbors[1]=_est->mvs[_vy-mvb_sz]+_vx+mvb_sz;
-      pneighbors[2]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-      pneighbors[3]=_est->mvs[_vy+mvb_sz]+_vx+mvb_sz;
-      memcpy(cneighbors,pneighbors,sizeof(cneighbors));
-    }
-    else{
-      pneighbors[0]=_est->mvs[_vy-mvb_sz]+_vx;
-      pneighbors[1]=_est->mvs[_vy]+_vx-mvb_sz;
-      pneighbors[2]=_est->mvs[_vy]+_vx+mvb_sz;
-      pneighbors[3]=_est->mvs[_vy+mvb_sz]+_vx;
-      cneighbors[0]=pneighbors[0];
-      cneighbors[1]=pneighbors[1];
-      /*NOTE: Only one of these candidatss can be excluded at a time, so
-         there will always be at least 3.*/
-      if(_vx+mvb_sz>_vx+4&~3)ncns--;
-      else cneighbors[2]=pneighbors[2];
-      if(_vy+mvb_sz>_vy+4&~3)ncns--;
-      else cneighbors[ncns-1]=pneighbors[3];
-    }
-  }
-  /*Spatially correlated predictors (from the current frame):*/
-  for(ci=0;ci<ncns;ci++){
-    a[ci][0]=cneighbors[ci]->mvs[0][_ref][0];
-    a[ci][1]=cneighbors[ci]->mvs[0][_ref][1];
-    cands[ci][0]=OD_CLAMPI(mvxmin,a[ci][0],mvxmax);
-    cands[ci][1]=OD_CLAMPI(mvymin,a[ci][1],mvymax);
-  }
-  /*Compute the median predictor:*/
-  if(ncns>3){
-    /*Median-of-4.*/
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    OD_SORT2I(a[2][0],a[3][0]);
-    OD_SORT2I(a[2][1],a[3][1]);
-    OD_SORT2I(a[0][0],a[2][0]);
-    OD_SORT2I(a[0][1],a[2][1]);
-    OD_SORT2I(a[1][0],a[3][0]);
-    OD_SORT2I(a[1][1],a[3][1]);
-    predx=a[1][0]+a[2][0];
-    predy=a[1][1]+a[2][1];
-    candx=OD_CLAMPI(mvxmin,OD_DIV2(predx),mvxmax);
-    candy=OD_CLAMPI(mvymin,OD_DIV2(predy),mvymax);
-  }
-  else{
-    /*Median-of-3.*/
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    OD_SORT2I(a[1][0],a[2][0]);
-    OD_SORT2I(a[1][1],a[2][1]);
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    predx=a[1][0]<<1;
-    predy=a[1][1]<<1;
-    candx=OD_CLAMPI(mvxmin,a[1][0],mvxmax);
-    candy=OD_CLAMPI(mvymin,a[1][1],mvymax);
-  }
-  od_mv_est_clear_hit_cache(_est);
-  /*fprintf(stderr,"%p (%i,%i)\n",iplane->data,bx+candx,by+candy);*/
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-     OD_YCbCr_MVCAND);
-  }
-#endif
-  best_sad=od_state_sad8(state,
-   iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-   iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-  best_rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-  best_cost=(best_sad<<OD_LAMBDA_SCALE)+best_rate*_est->lambda;
-  /*fprintf(stderr,"Median predictor: (%i,%i)   Error: %i\n",candx,candy,best_err);*/
-  od_mv_est_set_hit(_est,candx,candy);
-  best_vec[0]=candx;
-  best_vec[1]=candy;
-  /*fprintf(stderr,"Threshold: %i\n",OD_YSAD_THRESH1[log_mvb_sz]);*/
-  if(best_sad>OD_YSAD_THRESH1[log_mvb_sz]){
-    ogg_int32_t sad;
-    ogg_int32_t cost;
-    int         rate;
-    /*Compute the early termination threshold for set B.*/
-    t2=mv->bma_sad;
-    for(ci=0;ci<ncns;ci++){
-      int log_cnb_sz;
-      log_cnb_sz=4-OD_MC_LEVEL[cneighbors[ci]->vy&3][cneighbors[ci]->vx&3]>>1;
-      t2=OD_MINI(t2,cneighbors[ci]->bma_sad>>(log_cnb_sz-log_mvb_sz<<1));
-    }
-    t2=t2+(t2>>OD_YSAD_THRESH2_SCALE_BITS)+OD_YSAD_THRESH2_OFFS[log_mvb_sz];
-    /*Constant velocity predictor:*/
-    cands[ncns][0]=OD_CLAMPI(mvxmin,OD_DIV8(mv->mvs[1][_ref][0]),mvxmax);
-    cands[ncns][1]=OD_CLAMPI(mvymin,OD_DIV8(mv->mvs[1][_ref][1]),mvymax);
-    ncns++;
-    /*Zero predictor.*/
-    cands[ncns][0]=0;
-    cands[ncns][1]=0;
-    ncns++;
-    /*Examine the candidates in Set B.*/
-    for(ci=0;ci<ncns;ci++){
-      candx=cands[ci][0];
-      candy=cands[ci][1];
-      /*fprintf(stderr,"Set B predictor %i: (%i,%i) ",ci,candx,candy);*/
-      if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-      od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-         OD_YCbCr_MVCAND);
-      }
-#endif
-      sad=od_state_sad8(state,
-       iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-       iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-      rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-      cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-      /*fprintf(stderr,"   Error: %i\n",err);*/
-      if(cost<best_cost){
-        best_sad=sad;
-        best_rate=rate;
-        best_cost=cost;
-        best_vec[0]=candx;
-        best_vec[1]=candy;
-      }
-    }
-    /*fprintf(stderr,"Threshold: %i\n",t2);*/
-    if(best_sad>t2){
-      /*Constant velocity predictors from the previous frame:*/
-      for(ci=0;ci<4;ci++){
-        cands[ci][0]=
-         OD_CLAMPI(mvxmin,OD_DIV8(pneighbors[ci]->mvs[1][_ref][0]),mvxmax);
-        cands[ci][1]=
-         OD_CLAMPI(mvymin,OD_DIV8(pneighbors[ci]->mvs[1][_ref][1]),mvymax);
-      }
-      /*The constant acceleration predictor:*/
-      cands[4][0]=OD_CLAMPI(mvxmin,OD_DIV_ROUND_POW2(
-       mv->mvs[1][_ref][0]*_est->mvapw[_ref][0]-
-       mv->mvs[2][_ref][0]*_est->mvapw[_ref][1],16,0x8000),mvxmax);
-      cands[4][1]=OD_CLAMPI(mvymin,OD_DIV_ROUND_POW2(
-       mv->mvs[1][_ref][1]*_est->mvapw[_ref][0]-
-       mv->mvs[2][_ref][1]*_est->mvapw[_ref][1],16,0x8000),mvymax);
-      /*Examine the candidates in Set C.*/
-      for(ci=0;ci<5;ci++){
-        candx=cands[ci][0];
-        candy=cands[ci][1];
-        /*fprintf(stderr,"Set C predictor %i: (%i,%i) ",ci,candx,candy);*/
-        if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-        /*if(od_mv_est_is_hit(_est,candx,candy))continue;*/
-        od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-        if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-          od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-           OD_YCbCr_MVCAND);
-        }
-#endif
-        sad=od_state_sad8(state,
-         iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-         iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-        rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-        cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-        /*fprintf(stderr,"   Error: %i\n",err);*/
-        if(cost<best_cost){
-          best_sad=sad;
-          best_rate=rate;
-          best_cost=cost;
-          best_vec[0]=candx;
-          best_vec[1]=candy;
-        }
-      }
-      /*Use the same threshold for Set C as in Set B.*/
-      /*fprintf(stderr,"Threshold: %i\n",t2);*/
-      if(best_sad>t2){
-        int best_site;
-        int nsites;
-        int sitei;
-        int site;
-        int b;
-        /*Square pattern search.*/
-        for(;;){
-          best_site=4;
-          b=(best_vec[0]<=mvxmin)|(best_vec[0]>=mvxmax)<<1|
-           (best_vec[1]<=mvymin)<<2|(best_vec[1]>=mvymax)<<3;
-          nsites=OD_SQUARE_NSITES[b];
-          for(sitei=0;sitei<nsites;sitei++){
-            site=OD_SQUARE_SITES[b][sitei];
-            candx=best_vec[0]+OD_SQUARE_DX[site];
-            candy=best_vec[1]+OD_SQUARE_DY[site];
-            /*fprintf(stderr,"Square search %i: (%i,%i) ",site,candx,candy);*/
-            if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-            od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-            if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-              od_img_draw_line(&state->vis_img,x0,y0,
-               x0+(candx<<1),y0+(candy<<1),OD_YCbCr_MVCAND);
-            }
-#endif
-            sad=od_state_sad8(state,
-             iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-             iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-            rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-            cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-            /*fprintf(stderr,"   Error: %i\n",err);*/
-            if(cost<best_cost){
-              best_sad=sad;
-              best_rate=rate;
-              best_cost=cost;
-              best_site=site;
-            }
-          }
-          if(best_site==4)break;
-          best_vec[0]+=OD_SQUARE_DX[best_site];
-          best_vec[1]+=OD_SQUARE_DY[best_site];
-        }
-      }
-    }
-  }
-  /*fprintf(stderr,"Finished. Best vector: (%i,%i)  Best error %i\n",
-   best_vec[0],best_vec[1],best_err);*/
-  mv->mvs[0][_ref][0]=best_vec[0];
-  mv->mvs[0][_ref][1]=best_vec[1];
-  mvg->mv[0]=best_vec[0]<<3;
-  mvg->mv[1]=best_vec[1]<<3;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    char             iter_label[16];
-    const od_offset *anc;
-    od_mv_grid_pt   *amvg;
-    int              nanc;
-    int              ai;
-    int              ax;
-    int              ay;
-    mvg->valid=1;
-    nanc=OD_NANCESTORS[_vy&3][_vx&3];
-    anc=OD_ANCESTORS[_vy&3][_vx&3];
-    for(ai=0;ai<nanc;ai++){
-      ax=_vx+anc[ai][0];
-      if(ax<0||ax>(state->nhmbs+1<<2))continue;
-      ay=_vy+anc[ai][1];
-      if(ay<0||ay>(state->nvmbs+1<<2))continue;
-      amvg=state->mv_grid[ay]+ax;
-      amvg->valid=1;
-    }
-    sprintf(iter_label,"ani%08i",state->ani_iter++);
-    od_state_dump_img(state,&state->vis_img,iter_label);
-  }
-#endif
-  mv->bma_sad=best_sad;
-  mv->mv_rate=best_rate;
-  /*od_state_get_predictor(state,a[0],_vx,_vy,level,2);
-  if(a[0][0]!=predx||a[0][1]!=predy){
-    fprintf(stderr,"Failure in MV predictor init: (%i,%i)!=(%i,%i)\n",
-     a[0][0],a[0][1],predx,predy);
-  }
-  mv->mv_rate=od_mv_est_bits((mvg->mv[0]>>2)-a[0][0],(mvg->mv[1]>>2)-a[0][1]);
-  if(mv->mv_rate!=best_rate){
-    fprintf(stderr,"Failure in MV rate init: %i!=%i\n",mv->mv_rate,best_rate);
-  }*/
-}
-
-static void od_mv_est_init_mvs(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  /*TODO: Initialize a MVB at a time, for better cache coherency.*/
-  /*Move the motion vector predictors back a frame.*/
-  for(vy=2;vy<=nvmvbs-2;vy++)for(vx=2;vx<=nhmvbs-2;vx++){
-    od_mv_node *mv;
-    mv=_est->mvs[vy]+vx;
-    memmove(mv->mvs+1,mv->mvs+0,sizeof(mv->mvs[0])<<1);
-  }
-  /*Level 0 vertices.*/
-  for(vy=4;vy<nvmvbs;vy+=4){
-    for(vx=4;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 1 vertices.*/
-  if(_est->level_max<1)return;
-  for(vy=2;vy<nvmvbs;vy+=4){
-    for(vx=2;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 2 vertices.*/
-  if(_est->level_max<2)return;
-  for(vy=2;;vy+=2){
-    for(vx=4;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-    vy+=2;
-    if(vy>=nvmvbs)break;
-    for(vx=2;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 3 vertices.*/
-  if(_est->level_max<3)return;
-  for(vy=3;vy<nvmvbs-1;vy+=2){
-    for(vx=3;vx<nhmvbs-1;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 4 vertices.*/
-  if(_est->level_max<4)return;
-  for(vy=2;;vy++){
-    for(vx=3;vx<nhmvbs-1;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-    vy++;
-    if(vy>=nvmvbs-1)break;
-    for(vx=2;vx<nhmvbs;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-}
-
-
-/*STAGE 2: DECIMATION.*/
-
-
-
-/*Merging domains.
-  These are stored as lists of offsets to the vertices in the domain.
-  Note that vertices in the merging domain must appear in order from finest
-   scale (largest level) to coarsest (smallest level).
-  Each list ends with the vertex (0,0), the actual vertex be decimated.*/
-/*Level 4 vertex:
-            4
-*/
-static const od_offset OD_MERGEDOM4[1]={
-  {0,0},
-};
-
-/*Level 3 vertex:
-            4
-          4-3-4
-            4
-*/
-static const od_offset OD_MERGEDOM3[5]={
-  { 0,-1},{-1, 0},{ 1, 0},{ 0, 1},{ 0, 0}
-};
-
-/*Level 2 vertex:
-          4   4
-          |   |
-        4-3-4-3-4
-          | | |
-          4-2-4
-          | | |
-        4-3-4-3-4
-          |   |
-          4   4
-*/
-static const od_offset OD_MERGEDOM2[17]={
-  {-1,-2},{ 1,-2},{-2,-1},{ 0,-1},{ 2,-1},{-1, 0},{ 1, 0},{-2, 1},
-  { 0, 1},{ 2, 1},{-1, 2},{ 1, 2},{-1,-1},{ 1,-1},{-1, 1},{ 1, 1},
-  { 0, 0}
-};
-
-/*Level 1 vertex:
-          4   4
-          |   |
-        4-3-4-3-4
-          | | |
-      4   4-2-4   4
-      |   | | |   |
-    4-3-4-3-4-3-4-3-4
-      | | | | | | |
-      4-2-4-1-4-2-4
-      | | | | | | |
-    4-3-4-3-4-3-4-3-4
-      |   | | |   |
-      4   4-2-4   4
-          | | |
-        4-3-4-3-4
-          |   |
-          4   4
-*/
-static const od_offset OD_MERGEDOM1[49]={
-  {-1,-4},{ 1,-4},{-2,-3},{ 0,-3},{ 2,-3},{-3,-2},{-1,-2},{ 1,-2},
-  { 3,-2},{-4,-1},{-2,-1},{ 0,-1},{ 2,-1},{ 4,-1},{-3, 0},{-1, 0},
-  { 1, 0},{ 3, 0},{-4, 1},{-2, 1},{ 0, 1},{ 2, 1},{ 4, 1},{-3, 2},
-  {-1, 2},{ 1, 2},{ 3, 2},{-2, 3},{ 0, 3},{ 2, 3},{-1, 4},{ 1, 4},
-  {-1,-3},{ 1,-3},{-3,-1},{-1,-1},{ 1,-1},{ 3,-1},{-3, 1},{-1, 1},
-  { 1, 1},{ 3, 1},{-1, 3},{ 1, 3},{ 0,-2},{-2, 0},{ 2, 0},{ 0, 2},
-  { 0, 0}
-};
-
-/*The merging domain for a vertex, indexed by level-1.*/
-static const od_offset *OD_MERGEDOM[4]={
-  OD_MERGEDOM1,
-  OD_MERGEDOM2,
-  OD_MERGEDOM3,
-  OD_MERGEDOM4
-};
-
-/*Error support regions.
-  These are the blocks whose SAD will change after decimating a vertex at a
-   given level, assuming no other vertices in the mesh have been decimated.
-  Vertices in the figures at a higher level than the one removed illustrate one
-   possible configuration; there may be others.*/
-struct od_mv_err_node{
-  int dx;
-  int dy;
-  int log_mvb_sz;
-};
-
-/*Level 4 support:
-          4-3-4
-          |/|\|
-          2-.-1
-          |\|/|
-          4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM4[4]={
-  {-1,-1,0},{ 0,-1,0},{-1, 0,0},{ 0, 0,0}
-};
-
-/*Level 3 support:
-          4-3-4
-          |/|\|
-        4-0-.-2-4
-        |/|   |\|
-        3-.   .-3
-        |\|   |/|
-        4-2-.-1-4
-          |\|/|
-          4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM3[9]={
-  {-1,-2,0},{ 0,-2,0},{-2,-1,0},{ 1,-1,0},
-  {-2, 0,0},{ 1, 0,0},{-1, 1,0},{ 0, 1,0},
-  {-1,-1,1}
-};
-
-/*Level 2 support:
-        4-3-4-3-4
-        |/|\|/|\|
-      4-2-.-1-.-2-4
-      |/|  /|\  |\|
-      3-. / | \ .-3
-      |\|/  |  \|/|
-      4-0---.---0-4
-      |/|\  |  /|\|
-      3-. \ | / .-3
-      |\|  \|/  |/|
-      4-2-.-1-.-2-4
-        |\|/|\|/|
-        4-3-4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM2[20]={
-  {-2,-3,0},{-1,-3,0},{ 0,-3,0},{ 1,-3,0},
-  {-3,-2,0},{ 2,-2,0},{-3,-1,0},{ 2,-1,0},
-  {-3, 0,0},{ 2, 0,0},{-3, 1,0},{ 2, 1,0},
-  {-2, 2,0},{-1, 2,0},{ 0, 2,0},{ 1, 2,0},
-  {-2,-2,1},{ 0,-2,1},{-2, 0,1},{ 0, 0,1}
-};
-
-/*Level 1 support:
-        4-3-4-3-4
-        |/|\|/|\|
-      4-2-.-1-.-2-4
-      |/|  /|\  |\|
-    4-3-. / | \ .-3-4
-    |/| |/  |  \| |\|
-  4-2-.-0---.---0-.-2-4
-  |/|  /|       |\  |\|
-  3-. / |       | \ .-3
-  |\|/  |       |  \|/|
-  4-1---.       .---1-4
-  |/|\  |       |  /|\|
-  3-. \ |       | / .-3
-  |\|  \|       |/  |/|
-  4-2-.-0---.---0-.-2-4
-    |\| |\  |  /| |/|
-    4-3-. \ | / .-3-4
-      |\|  \|/  |/|
-      4-2-.-1-.-2-4
-        |\|/|\|/|
-        4-3-4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM1[37]={
-  {-2,-5,0},{-1,-5,0},{ 0,-5,0},{ 1,-5,0},
-  {-3,-4,0},{ 2,-4,0},{-4,-3,0},{-3,-3,0},
-  { 2,-3,0},{ 3,-3,0},{-5,-2,0},{ 4,-2,0},
-  {-5,-1,0},{ 4,-1,0},{-5, 0,0},{ 4, 0,0},
-  {-5, 1,0},{ 4, 1,0},{-4, 2,0},{-3, 2,0},
-  { 2, 2,0},{ 3, 2,0},{-3, 3,0},{ 2, 3,0},
-  {-2, 4,0},{-1, 4,0},{ 0, 4,0},{ 1, 4,0},
-  {-2,-4,1},{ 0,-4,1},{-4,-2,1},{ 2,-2,1},
-  {-4, 0,1},{ 2, 0,1},{-2, 2,1},{ 0, 2,1},
-  {-2,-2,2}
-};
-
-/*The number of blocks in each decimated error domain.*/
-static const int OD_NERRDOM[4]={37,20,9,4};
-/*The error domain for a vertex, indexed by level-1.*/
-static const od_mv_err_node *OD_ERRDOM[4]={
-  OD_ERRDOM1,
-  OD_ERRDOM2,
-  OD_ERRDOM3,
-  OD_ERRDOM4
-};
-
-/*Returns -1, 0, or 1, depending if -_dd1/_dr1 is less, equal or greater than
-   -_dd2/_dr2.*/
-static int od_mv_dddr_cmp(ogg_int32_t _dd1,int _dr1,
- ogg_int32_t  _dd2,int _dr2){
-  ogg_int64_t diff;
-  /*dr==0 and dd!=0 should not be possible, but we check for it anyway just in
-     case, to prevent a bug from trashing the whole optimization process.*/
-  if(_dr1==0)return _dr2==0?OD_SIGNI(_dd2-_dd1):_dd1<=0?-1:1;
-  else if(_dr2==0)return _dd2<=0?1:-1;
-  diff=_dd2*(ogg_int64_t)_dr1-_dd1*(ogg_int64_t)_dr2;
-  return OD_SIGNI(diff);
-}
-
-/*Compare two nodes on the decimation heap.*/
-static int od_mv_dec_cmp(od_mv_node *_n1,od_mv_node *_n2){
-  return od_mv_dddr_cmp(_n1->dd,_n1->dr,_n2->dd,_n2->dr);
-}
-
-/*Swap the two nodes on the decimation heap at indices _p and _q.*/
-static void od_mv_dec_heap_swap(od_mv_node **_heap,int _p,int _q){
-  od_mv_node *t;
-  _heap[_p]->heapi=_q;
-  _heap[_q]->heapi=_p;
-  t=_heap[_p];
-  _heap[_p]=_heap[_q];
-  _heap[_q]=t;
-}
-
-/*Convert the list of nodes to be decimated to a heap.*/
-static void od_mv_dec_heapify(od_mv_est_ctx *_est){
-  od_mv_node **heap;
-  int          l;
-  int          r;
-  int          i;
-  heap=_est->dec_heap;
-  l=_est->dec_nheap>>1;
-  r=_est->dec_nheap-1;
-  for(i=l;i-->0;){
-    int p;
-    p=i;
-    do{
-      int q;
-      q=(p<<1)+1;
-      if(q<r&&od_mv_dec_cmp(heap[q],heap[q+1])>=0)q++;
-      if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-      od_mv_dec_heap_swap(heap,p,q);
-      p=q;
-    }
-    while(p<l);
-  }
-}
-
-/*Restore the heap structure at the given index by moving it down the heap.*/
-static void od_mv_dec_heap_down(od_mv_est_ctx *_est,int _heapi){
-  od_mv_node **heap;
-  int          l;
-  int          r;
-  int          p;
-  heap=_est->dec_heap;
-  l=_est->dec_nheap>>1;
-  r=_est->dec_nheap-1;
-  p=_heapi;
-  while(p<l){
-    int q;
-    q=(p<<1)+1;
-    if(q<r&&od_mv_dec_cmp(heap[q],heap[q+1])>=0)q++;
-    if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-    od_mv_dec_heap_swap(heap,p,q);
-    p=q;
-  }
-}
-
-/*Restore the heap structure at the given index by moving it up the heap.*/
-static void od_mv_dec_heap_up(od_mv_est_ctx *_est,int _heapi){
-  od_mv_node **heap;
-  int          p;
-  heap=_est->dec_heap;
-  p=_heapi;
-  while(p>0){
-    int q;
-    q=p;
-    p=(q+1>>1)-1;
-    if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-    od_mv_dec_heap_swap(heap,p,q);
-  }
-}
-
-/*Retrieve the item at the top of the heap.
-  Returns NULL if there are no more nodes to decimate.*/
-static od_mv_node *od_mv_dec_heap_delhead(od_mv_est_ctx *_est){
-  od_mv_node *ret;
-  if(_est->dec_nheap<=0)return NULL;
-  ret=_est->dec_heap[0];
-  ret->heapi=-1;
-  if(--_est->dec_nheap>0){
-    _est->dec_heap[0]=_est->dec_heap[_est->dec_nheap];
-    _est->dec_heap[0]->heapi=0;
-    od_mv_dec_heap_down(_est,0);
-  }
-  return ret;
-}
-
-static void od_mv_dec_heap_del(od_mv_est_ctx *_est,od_mv_node *_node){
-  int heapi;
-  heapi=_node->heapi;
-  if(heapi>=0){
-    _node->heapi=-1;
-    _est->dec_nheap--;
-    if(_est->dec_nheap>heapi){
-      _est->dec_heap[heapi]=_est->dec_heap[_est->dec_nheap];
-      _est->dec_heap[heapi]->heapi=heapi;
-      if(od_mv_dec_cmp(_node,_est->dec_heap[heapi])>=0){
-        od_mv_dec_heap_up(_est,heapi);
-      }
-      else od_mv_dec_heap_down(_est,heapi);
-    }
-    else _est->dec_heap[_est->dec_nheap]=NULL;
-  }
-}
-
-/*Sets the dd and dr values of the given node, restoring the heap structure
-   afterwards.*/
-static void od_mv_dec_update(od_mv_est_ctx *_est,od_mv_node *_node,
- int _dd,int _dr){
-  int diff;
-  diff=od_mv_dddr_cmp(_dd,_dr,_node->dd,_node->dr);
-  _node->dd=_dd;
-  _node->dr=_dr;
-  if(_node->heapi>=0){
-    if(diff<=0)od_mv_dec_heap_up(_est,_node->heapi);
-    else od_mv_dec_heap_down(_est,_node->heapi);
-  }
-}
-
-static void od_mv_est_init_nodes(od_mv_est_ctx *_est){
-  od_state      *state;
-  od_mv_node    *mv_row;
-  od_mv_grid_pt *grid;
-  int            nhmvbs;
-  int            nvmvbs;
-  int            etype;
-  int            ebits;
-  int            vx;
-  int            vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_est->flags&OD_MC_USEV){
-    if(_est->flags&OD_MC_USEB){
-      etype=0;
-      ebits=3;
-    }
-    else{
-      etype=1;
-      ebits=0;
-    }
-  }
-  else etype=ebits=0;
-  for(vy=0;vy<=nvmvbs;vy++){
-    mv_row=_est->mvs[vy];
-    grid=state->mv_grid[vy];
-    for(vx=0;vx<=nhmvbs;vx++){
-      int level;
-      level=OD_MC_LEVEL[vy&3][vx&3];
-      if(level<=_est->level_max){
-        /*While we're here, reset the MV state.*/
-        grid[vx].valid=1;
-        grid[vx].right=etype;
-        grid[vx].down=etype;
-        _est->col_counts[vx]++;
-        _est->row_counts[vy]++;
-        /*Except at the lowest level, vertices require on average 2 bits to
-           indicate the presence of children.*/
-        /*Vertices on even levels require new edge labels.*/
-        mv_row[vx].dr=-(mv_row[vx].mv_rate+((level<4)<<1)+((level&1)-1&ebits));
-      }
-      else grid[vx].valid=0;
-    }
-  }
-}
-
-/*Computes the SAD of all blocks at all scales with all possible edge
-   splittings, using OBMC.
-  These are what will drive the error of the adaptive subdivision process.*/
-static void od_mv_est_calc_sads(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  int       c;
-  int       s;
-  state=&_est->enc->state;
-  /*TODO: Interleaved evaluation would probably provide better cache
-     coherency.*/
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_est->level_max>=3){
-    for(vy=0;vy<nvmvbs;vy++){
-      od_mv_node *mv_row;
-      mv_row=_est->mvs[vy];
-      for(vx=0;vx<nhmvbs;vx++){
-        c=(vx&1)^((vy&1)<<1|vy&1);
-        /*While we're here, fill in the block's setup sate.*/
-        mv_row[vx].c=c;
-        mv_row[vx].log_mvb_sz=0;
-        if(_est->level_max>=4){
-          for(s=0;s<4;s++){
-            _est->sad_cache[0][vy][vx][s]=
-            (ogg_uint16_t)od_mv_est_sad8(_est,_ref,vx,vy,c,s,0);
-          }
-          mv_row[vx].s=3;
-          mv_row[vx].sad=_est->sad_cache[0][vy][vx][3];
-        }
-        else{
-          mv_row[vx].s=0;
-          mv_row[vx].sad=od_mv_est_sad8(_est,_ref,vx,vy,c,0,0);
-        }
-      }
-    }
-  }
-  nhmvbs>>=1;
-  nvmvbs>>=1;
-  if(_est->level_max>=1){
-    for(vy=0;vy<nvmvbs;vy++){
-      od_mv_node *mv_row;
-      mv_row=_est->mvs[vy<<1];
-      for(vx=0;vx<nhmvbs;vx++){
-        c=(vx&1)^((vy&1)<<1|vy&1);
-        if(_est->level_max>=2){
-          for(s=0;s<4;s++){
-            _est->sad_cache[1][vy][vx][s]=
-             (ogg_uint16_t)od_mv_est_sad8(_est,_ref,vx<<1,vy<<1,c,s,1);
-          }
-          if(_est->level_max==2){
-            mv_row[vx<<1].c=c;
-            mv_row[vx<<1].s=3;
-            mv_row[vx<<1].log_mvb_sz=1;
-            mv_row[vx<<1].sad=_est->sad_cache[1][vy][vx][3];
-          }
-        }
-        else{
-          mv_row[vx<<1].c=c;
-          mv_row[vx<<1].s=0;
-          mv_row[vx<<1].log_mvb_sz=1;
-          mv_row[vx<<1].sad=od_mv_est_sad8(_est,_ref,vx<<1,vy<<1,c,0,1);
-        }
-      }
-    }
-  }
-  else{
-    nhmvbs>>=1;
-    nvmvbs>>=1;
-    for(vy=0;vy<nvmvbs;vy++){
-      od_mv_node *mv_row;
-      mv_row=_est->mvs[vy<<2];
-      for(vx=0;vx<nhmvbs;vx++){
-        mv_row[vx<<2].c=0;
-        mv_row[vx<<2].s=3;
-        mv_row[vx<<2].log_mvb_sz=2;
-        mv_row[vx<<2].sad=od_mv_est_sad8(_est,_ref,vx<<2,vy<<2,0,3,2);
-      }
-    }
-  }
-}
-
-static void od_mv_est_init_du(od_mv_est_ctx *_est,int _ref,int _vx,int _vy){
-  od_state             *state;
-  od_mv_node           *dec;
-  od_mv_node           *merge;
-  const od_mv_err_node *errdom;
-  int                   nerrdom;
-  const od_offset      *mergedom;
-  int                   nhmvbs;
-  int                   nvmvbs;
-  int                   level;
-  int                   undecs;
-  int                   dlev;
-  int                   di;
-  int                   vx;
-  int                   vy;
-  int                   dx;
-  int                   dy;
-  /*fprintf(stderr,"Computing du's for (%i,%i)\n",_vx,_vy);*/
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  dec=_est->mvs[_vy]+_vx;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  dlev=_est->level_max<=2;
-  undecs=_est->level_max&1?0:3;
-  errdom=OD_ERRDOM[level-1+(dlev<<1)];
-  nerrdom=OD_NERRDOM[level-1+(dlev<<1)];
-  mergedom=OD_MERGEDOM[level-1+(dlev<<1)];
-  dec->dd=0;
-  /*Subtract off the error before decimation.*/
-  for(di=0;di<nerrdom;di++){
-    vx=_vx+(errdom[di].dx<<dlev);
-    vy=_vy+(errdom[di].dy<<dlev);
-    if(vx>=0&&vy>=0&&vx<nhmvbs&&vy<nvmvbs){
-      int mvb_sz;
-      mvb_sz=1<<errdom[di].log_mvb_sz;
-      for(dy=0;dy<mvb_sz;dy++){
-        for(dx=0;dx<mvb_sz;dx++){
-          dec->dd-=_est->sad_cache[dlev][(vy>>dlev)+dy][(vx>>dlev)+dx][undecs];
-          /*fprintf(stderr,"Added error (%i,%i) [%ix%i]: %i\n",
-           vx+(dx<<dlev),vy+(dy<<dlev),4<<dlev,4<<dlev,dec->dd);*/
-        }
-      }
-    }
-    /*else fprintf(stderr,"(%i,%i) outside [%i,%i]x[%i,%i]\n",vx,vy,0,0,nhmvbs,nvmvbs);*/
-  }
-  /*fprintf(stderr,"Subtracted initial error: %i\n",dec->dd);*/
-  /*Decimate the vertices in the merging domain.
-    Also sum up the rate changes while we do it.*/
-  for(di=0;;di++){
-    vx=_vx+(mergedom[di][0]<<dlev);
-    if(vx<0||vx>nhmvbs)continue;
-    vy=_vy+(mergedom[di][1]<<dlev);
-    if(vy<0||vy>nvmvbs)continue;
-    if(OD_MC_LEVEL[vy&3][vx&3]>_est->level_max)continue;
-    state->mv_grid[vy][vx].valid=0;
-    merge=_est->mvs[vy]+vx;
-    if(merge==dec)break;
-    dec->dr+=merge->dr;
-    /*fprintf(stderr,"Merged vertex (%2i,%2i), dr: %i\n",vx,vy,dec->dr);*/
-  }
-  /*fprintf(stderr,"Merged vertex (%2i,%2i)\n",vx,vy);*/
-  /*fprintf(stderr,"Decimated vertices in merging domain.\n");*/
-  /*Add in the error after decimation.*/
-  for(di=0;di<nerrdom;di++){
-    vx=_vx+(errdom[di].dx<<dlev);
-    vy=_vy+(errdom[di].dy<<dlev);
-    if(vx>=0&&vy>=0&&vx<nhmvbs&&vy<nvmvbs){
-      int log_mvb_sz;
-      log_mvb_sz=errdom[di].log_mvb_sz+dlev;
-      if(log_mvb_sz<2){
-        int mask;
-        int c;
-        int s;
-        mask=(1<<log_mvb_sz+1)-1;
-        c=!!(vx&mask);
-        if(vy&mask)c=3-c;
-        s=state->mv_grid[vy+(OD_VERT_DY[c+1&3]<<log_mvb_sz)][
-         vx+(OD_VERT_DX[c+1&3]<<log_mvb_sz)].valid|
-         state->mv_grid[vy+(OD_VERT_DY[c+3&3]<<log_mvb_sz)][
-         vx+(OD_VERT_DX[c+3&3]<<log_mvb_sz)].valid<<1;
-        dec->dd+=
-         _est->sad_cache[log_mvb_sz][vy>>log_mvb_sz][vx>>log_mvb_sz][s];
-        /*fprintf(stderr,"Added error (%i,%i) [%ix%i] {%i,%i}: %i\n",vx,vy,1<<log_mvb_sz+2,1<<log_mvb_sz+2,c,s,dec->dd);*/
-      }
-      else{
-        /*Cache the SAD for top-level blocks in the dd field, which is
-           otherwise unused (since they cannot be decimated).*/
-        _est->mvs[vy][vx].dd=od_mv_est_sad8(_est,_ref,vx,vy,0,3,2);
-        dec->dd+=_est->mvs[vy][vx].dd;
-        /*fprintf(stderr,"Added error (%i,%i) [%ix%i]: %i\n",
-         vx,vy,1<<log_mvb_sz+2,1<<log_mvb_sz+2,dec->dd);*/
-      }
-    }
-  }
-  /*fprintf(stderr,"Total merging error: %i\n",dec->dd);*/
-  /*Restore the vertices in the merging domain.*/
-  for(di=0;;di++){
-    vx=_vx+(mergedom[di][0]<<dlev);
-    if(vx<0||vx>nhmvbs)continue;
-    vy=_vy+(mergedom[di][1]<<dlev);
-    if(vy<0||vy>nvmvbs)continue;
-    if(OD_MC_LEVEL[vy&3][vx&3]>_est->level_max)continue;
-    state->mv_grid[vy][vx].valid=1;
-    if(vx==_vx&&vy==_vy)break;
-  }
-  /*fprintf(stderr,"Restored vertices in merging domain.\n");*/
-  /*Add this node to the heap.*/
-  dec->heapi=_est->dec_nheap;
-  _est->dec_heap[_est->dec_nheap++]=dec;
-}
-
-static void od_mv_est_init_dus(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  memset(_est->col_counts,0,sizeof(_est->col_counts[0])*(nhmvbs+1));
-  memset(_est->row_counts,0,sizeof(_est->col_counts[0])*(nvmvbs+1));
-  od_mv_est_init_nodes(_est);
-  /*fprintf(stderr,"Finished MV bits.\n");*/
-  od_mv_est_calc_sads(_est,_ref);
-  /*fprintf(stderr,"Finished SADs.\n");*/
-  /*Clear the merge heap.*/
-  _est->dec_nheap=0;
-  _est->dec_heap[0]=NULL;
-  /*The initialization is destructive to dr, and so must proceed by level from
-     top to bottom.*/
-  if(_est->level_max>=1){
-    /*Level 1 vertices.*/
-    for(vy=2;vy<=nvmvbs;vy+=4){
-      for(vx=2;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-    }
-    if(_est->level_max>=2){
-      /*Level 2 vertices.*/
-      for(vy=0;;vy+=2){
-        for(vx=2;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-        vy+=2;
-        if(vy>nvmvbs)break;
-        for(vx=0;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-      }
-      if(_est->level_max>=3){
-        /*Level 3 vertices.*/
-        for(vy=1;vy<=nvmvbs;vy+=2){
-          for(vx=1;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-        }
-        if(_est->level_max>=4){
-          /*Level 4 vertices.*/
-          for(vy=0;;vy++){
-            for(vx=1;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-            vy++;
-            if(vy>nvmvbs)break;
-            for(vx=0;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-          }
-        }
-      }
-    }
-  }
-  /*Make the node list into a proper heap.*/
-  od_mv_dec_heapify(_est);
-}
-
-static void od_mv_est_decimate(od_mv_est_ctx *_est,int _ref){
-  od_mv_node *dec;
-  od_state   *state;
-  int         nhmvbs;
-  int         nvmvbs;
-  int         dlev;
-  int         vx;
-  int         vy;
-  od_mv_est_init_dus(_est,_ref);
-  od_mv_est_check_rd_state(_est,_ref,2);
-  /*fprintf(stderr,"%i %i %i %i\n",
-   _est->sad_cache[0][30][2][0],
-   _est->sad_cache[0][30][2][1],
-   _est->sad_cache[0][30][2][2],
-   _est->sad_cache[0][30][2][3]);
-  fprintf(stderr,"%i %i %i %i\n",
-   _est->sad_cache[0][31][2][0],
-   _est->sad_cache[0][31][2][1],
-   _est->sad_cache[0][31][2][2],
-   _est->sad_cache[0][31][2][3]);*/
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  dlev=_est->level_max<=2;
-  for(;;){
-    const od_offset *mergedom;
-    int              level;
-    int              di;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-    if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-      char iter_label[16];
-      od_state_mc_predict(state,_ref);
-      od_state_fill_vis(state);
-      sprintf(iter_label,"ani%08i",state->ani_iter++);
-      od_state_dump_img(state,&state->vis_img,iter_label);
-    }
-#endif
-    dec=od_mv_dec_heap_delhead(_est);
-    /*Stop if we've fully decimated the mesh, or if this decimation would not
-       improve R-D performance at the current lambda.*/
-    if(dec==NULL||dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE)>0)break;
-    level=OD_MC_LEVEL[dec->vy&3][dec->vx&3];
-    /*fprintf(stderr,"Iteration %i; Merging node (%2i,%2i), level %i, dd %5i, dr %5i, dopt %5i:\n",
-     iteration,dec->vx,dec->vy,level,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));*/
-    mergedom=OD_MERGEDOM[level-1+(dlev<<1)];
-    for(di=0;;di++){
-      od_mv_node      *merge;
-      od_mv_node      *ancestor;
-      od_mv_node      *block;
-      const od_offset *anc;
-      int              nanc;
-      int              ai;
-      int              ax;
-      int              ay;
-      int              bx;
-      int              by;
-      int              log_mvb_sz;
-      int              mask;
-      /*Don't decimate vertices outside of the mesh.*/
-      vx=dec->vx+(mergedom[di][0]<<dlev);
-      if(vx<0||vx>nhmvbs)continue;
-      vy=dec->vy+(mergedom[di][1]<<dlev);
-      if(vy<0||vy>nvmvbs)continue;
-      merge=_est->mvs[vy]+vx;
-      /*Don't decimate vertices that have already been decimated.*/
-      if(!state->mv_grid[vy][vx].valid){/*fprintf(stderr,"Skipping node (%i,%i) (already merged).\n",vx,vy);*/continue;}
-      /*fprintf(stderr,"Merging node (%2i,%2i), dd %5i, dr %5i:\n",vx,vy,
-       merge->dd,merge->dr);*/
-      /*Update the deltas for this vertex in the merging domain.
-        The simple rule applied below handles overlapped domains withan
-         inclusion-exclusion approach.
-        See Balmelli 2001 for details.*/
-      nanc=OD_NANCESTORS[vy&3][vx&3];
-      anc=OD_ANCESTORS[vy&3][vx&3];
-      for(ai=0;ai<nanc;ai++){
-        ax=vx+anc[ai][0];
-        if(ax<0||ax>nhmvbs)continue;
-        ay=vy+anc[ai][1];
-        if(ay<0||ay>nvmvbs)continue;
-        ancestor=_est->mvs[ay]+ax;
-        od_mv_dec_update(_est,ancestor,
-         ancestor->dd-merge->dd,ancestor->dr-merge->dr);
-        /*fprintf(stderr,"Updated ancestor (%2i,%2i) of (%2i,%2i): dd %5i, dr %5i\n",
-         ax,ay,vx,vy,ancestor->dd,ancestor->dr);*/
-      }
-      state->mv_grid[vy][vx].valid=0;
-      od_mv_dec_heap_del(_est,merge);
-      _est->col_counts[vx]--;
-      _est->row_counts[vy]--;
-      level=OD_MC_LEVEL[vy&3][vx&3];
-      log_mvb_sz=4-level>>1;
-      /*Account for quadrilaterals which may have only partially belonged to
-         the merging domain (e.g., that would not have belonged were we using
-         triangles).*/
-      if(!(level&1)){
-        static const int OD_CDX[4]={-1,1,-1,1};
-        static const int OD_CDY[4]={-1,-1,1,1};
-        int k;
-        mask=(1<<log_mvb_sz+1)-1;
-        for(k=0;k<4;k++){
-          int cx;
-          int cy;
-          int ddd;
-          int s;
-          cx=vx+(OD_CDX[k]<<log_mvb_sz);
-          if(cx<0||cx>nhmvbs)continue;
-          cy=vy+(OD_CDY[k]<<log_mvb_sz);
-          if(cy<0||cy>nvmvbs)continue;
-          bx=vx+(OD_ERRDOM4[k].dx<<log_mvb_sz);
-          by=vy+(OD_ERRDOM4[k].dy<<log_mvb_sz);
-          block=_est->mvs[by]+bx;
-          by>>=log_mvb_sz;
-          bx>>=log_mvb_sz;
-          if(!state->mv_grid[cy][cx].valid){
-            block->s=0;
-            block->sad=_est->sad_cache[log_mvb_sz][by][bx][0];
-            /*If the opposing corner has already been decimated, the remaining
-               adjustments have already been made.*/
-            continue;
-          }
-          /*s is the split state of the error block with (vx,vy) decimated, and
-             (cx,cy) undecimated.*/
-          s=1<<(((k+3&3)>>1)^!!(vx&mask));
-          block->s=s;
-          block->sad=_est->sad_cache[log_mvb_sz][by][bx][s];
-          /*Replace the old decimation error change with the new one.*/
-          ddd=_est->sad_cache[log_mvb_sz][by][bx][0]-
-           _est->sad_cache[log_mvb_sz][by][bx][s^3]+
-           _est->sad_cache[log_mvb_sz][by][bx][3]-
-           _est->sad_cache[log_mvb_sz][by][bx][s];
-          /*fprintf(stderr,"Checking opposing corner (%2i,%2i): ddd %i\n",
-           cx,cy,ddd);*/
-          /*This happens in regions of constant motion.*/
-          if(ddd==0)continue;
-          ancestor=_est->mvs[cy]+cx;
-          od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-          /*fprintf(stderr,"Updated corner (%2i,%2i): dd %5i, dr %5i\n",
-           cx,cy,ancestor->dd,ancestor->dr);*/
-          /*Update the opposing corner's ancestors, which also, of
-             necessity, must contain the affected quadrilateral, and must
-             not have been decimated yet.*/
-          nanc=OD_NANCESTORS[cy&3][cx&3];
-          anc=OD_ANCESTORS[cy&3][cx&3];
-          for(ai=0;ai<nanc;ai++){
-            ax=cx+anc[ai][0];
-            if(ax<0||ax>nhmvbs)continue;
-            ay=cy+anc[ai][1];
-            if(ay<0||ay>nvmvbs)continue;
-            ancestor=_est->mvs[ay]+ax;
-            od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-            /*fprintf(stderr,"Updated ancestor (%2i,%2i): dd %5i, dr %5i\n",
-             ax,ay,ancestor->dd,ancestor->dr);*/
-          }
-          /*Add back in the components that do not apply to the interior
-             corner.*/
-          ddd=-ddd;
-          if(vx&mask)cx=vx;
-          else cy=vy;
-          /*fprintf(stderr,"Checking interior corner (%2i,%2i): ddd %i\n",
-           cx,cy,ddd);*/
-          ancestor=_est->mvs[cy]+cx;
-          od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-          /*fprintf(stderr,"Updated corner (%2i,%2i): dd %5i, dr %5i\n",
-           cx,cy,ancestor->dd,ancestor->dr);*/
-          /*And update all the interior corner's ancestors, which also, of
-             necessity, must contain the affected quadrilateral, and must not
-             have been decimated yet.*/
-          nanc=OD_NANCESTORS[cy&3][cx&3];
-          anc=OD_ANCESTORS[cy&3][cx&3];
-          for(ai=0;ai<nanc;ai++){
-            ax=cx+anc[ai][0];
-            if(ax<0||ax>nhmvbs)continue;
-            ay=cy+anc[ai][1];
-            if(ay<0||ay>nvmvbs)continue;
-            ancestor=_est->mvs[ay]+ax;
-            od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-            /*fprintf(stderr,"Updated ancestor (%2i,%2i): dd %5i, dr %5i\n",
-             ax,ay,ancestor->dd,ancestor->dr);*/
-          }
-        }
-      }
-      /*Otherwise, we eliminated several smaller blocks.
-        Update the SAD and block setup for the larger block that took their
-         place.*/
-      else{
-        int c;
-        bx=vx-(1<<log_mvb_sz);
-        by=vy-(1<<log_mvb_sz);
-        log_mvb_sz++;
-        mask=(1<<log_mvb_sz+1)-1;
-        c=!!(bx&mask);
-        if(by&mask)c=3-c;
-        block=_est->mvs[by]+bx;
-        block->log_mvb_sz=log_mvb_sz;
-        block->c=c;
-        block->s=3;
-        if(log_mvb_sz<2){
-          block->sad=
-           _est->sad_cache[log_mvb_sz][by>>log_mvb_sz][bx>>log_mvb_sz][3];
-        }
-        /*At the top level, we cached the SAD in the dd field.*/
-        else block->sad=block->dd;
-      }
-      /*If we just decimated our target vertex, stop.*/
-      if(merge==dec)break;
-    }
-  }
-  od_mv_est_check_rd_state(_est,_ref,2);
-  /*fprintf(stderr,"Finished merging.\n");*/
-  /*if(dec!=NULL){
-    fprintf(stderr,"Node (%i,%i) dd %i, dr %i, dopt %i: not enough.\n",
-     dec->vx,dec->vy,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));
-  }*/
-  /*if(state->mv_grid[31][1].valid){
-    dec=_est->mvs[31]+1;
-    fprintf(stderr,"(%i,%i) remains. dd: %5i, dr: %2i, dopt: %6i.\n",
-     dec->vx,dec->vy,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));
-  }*/
-}
-
-
-
-/*STAGE 3: Iterated Dynamic Programming.*/
-
-
-
-/*The list of MVs that can be predicted by a level 0 MV, excluding those not
-   yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED0[17]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 2,-2},{ 1,-1},{ 2, 2},{ 1, 1},{ 0, 4},{ 4, 4},
-  /*The remaining ones are not.*/
-  {-2,-2},{ 0,-2},{-1,-1},{ 0,-1},{-1, 0},{-2, 0},
-  {-1, 1},{ 0, 1},{-2, 2},{ 0, 2},{-4, 4}
-};
-/*The list of MVs that can be predicted by a level 1 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED1[10]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 1,-1},{ 1, 1},
-  /*The remaining ones are not.*/
-  { 0,-2},{-1,-1},{ 0,-1},{-2, 0},{-1, 0},{-1, 1},{ 0, 1},{ 0, 2}
-};
-/*The list of MVs that can be predicted by a level 2 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED2[7]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 1,-1},{ 1, 1},
-  /*The remaining ones are not.*/
-  {-1,-1},{ 0,-1},{-1, 0},{-1, 1},{ 0, 1}
-};
-/*The list of MVs that can be predicted by a level 3 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED3[3]={
-  /*These predicted MVs are NOT changeable by future MVs in the DP path.*/
-  { 0,-1},{-1, 0},{ 0, 1}
-};
-
-/*The list of MVs that can be predicted by a level 0 MV, excluding those not
-   yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED0[17]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 2, 2},{-2, 2},{-1, 1},{ 1, 1},{ 4, 4},
-  /*The remaining ones are not.*/
-  {-2,-2},{ 0,-2},{ 2,-2},{-1,-1},{ 0,-1},{ 1,-1},
-  {-2, 0},{-1, 0},{ 1, 0},{ 2, 0},{ 4, 0},{-4, 4}
-};
-/*The list of MVs that can be predicted by a level 1 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED1[10]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  {-1, 1},{ 1, 1},
-  /*The remaining ones are not.*/
-  { 0,-2},{-1,-1},{ 0,-1},{ 1,-1},{-2, 0},{-1, 0},{ 1, 0},{ 2, 0}
-};
-/*The list of MVs that can be predicted by a level 2 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED2[7]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  {-1, 1},{ 1, 1},
-  /*The remaining ones are not.*/
-  {-1,-1},{ 0,-1},{ 1,-1},{-1, 0},{ 1, 0}
-};
-/*The list of MVs that can be predicted by a level 3 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED3[3]={
-  /*These predicted MVs are NOT changeable by future MVs in the DP path.*/
-  { 0,-1},{-1, 0},{ 1, 0}
-};
-
-/*The number of predicted MVs in each list.*/
-static const int OD_NPREDICTED[5]={17,10,7,3,0};
-/*The number of changeable predicted MVs in each list.*/
-static const int OD_NROW_PRED_CHANGEABLE[4]={6,2,2,0};
-/*The number of changeable predicted MVs in each list.*/
-static const int OD_NCOL_PRED_CHANGEABLE[4]={5,2,2,0};
-/*The lists of offsets to predicted MVs for each level.*/
-static const od_offset *const OD_ROW_PREDICTED[4]={
-  OD_ROW_PREDICTED0,
-  OD_ROW_PREDICTED1,
-  OD_ROW_PREDICTED2,
-  OD_ROW_PREDICTED3
-};
-/*The lists of offsets to predicted MVs for each level.*/
-static const od_offset *const OD_COL_PREDICTED[4]={
-  OD_COL_PREDICTED0,
-  OD_COL_PREDICTED1,
-  OD_COL_PREDICTED2,
-  OD_COL_PREDICTED3
-};
-
-/*The amount of history to restore in the trellis state to ensure predicted MVs
-   are evaluated correctly in row refinement.*/
-static const int OD_ROW_PRED_HIST_SIZE[5]={8,4,2,2,1};
-/*The amount of history to restore in the trellis state to ensure predicted MVs
-   are evaluated correctly in column refinement.*/
-static const int OD_COL_PRED_HIST_SIZE[5]={8,4,2,2,1};
-
-
-
-/*Returns the boundary case indicating which motion vector range edges the
-   current motion vector is abutting.
-  _vx:         The horizontal position of the node.
-  _vy:         The vertical position of the node.
-  _dx:         The horizontal component of the motion vector.
-  _dy:         The vertical component of the motion vector.
-  _dsz:        The amount the vector is being adjusted by.
-  _log_blk_sz: The log base 2 of the maximum size of a block the vector can
-                belong to.
-  Return: A set of flags indicating the boundary conditions, after the
-   documentation at OD_SQUARE_SITES.*/
-static int od_mv_est_get_boundary_case(od_state *_state,int _vx,int _vy,
- int _dx,int _dy,int _dsz,int _log_blk_sz){
-  int mvxmin;
-  int mvxmax;
-  int mvymin;
-  int mvymax;
-  int blk_sz;
-  int bx;
-  int by;
-  blk_sz=1<<_log_blk_sz;
-  bx=_vx-2<<2;
-  by=_vy-2<<2;
-  mvxmin=OD_MAXI(bx-blk_sz-32,-16)-(bx-blk_sz)<<3;
-  mvxmax=(OD_MINI(bx+blk_sz+32,_state->info.frame_width+16)-(bx+blk_sz)<<3)-
-   _dsz;
-  mvymin=OD_MAXI(by-blk_sz-32,-16)-(by-blk_sz)<<3;
-  mvymax=(OD_MINI(by+blk_sz+32,_state->info.frame_height+16)-(by+blk_sz)<<3)-
-   _dsz;
-  return (_dx<=mvxmin)|(_dx>=mvxmax)<<1|(_dy<=mvymin)<<2|(_dy>=mvymax)<<3;
-}
-
-/*Computes the SAD of the specified block.*/
-static ogg_int32_t od_mv_est_block_sad8(od_mv_est_ctx *_est,int _ref,
- od_mv_node *_block){
-  /*ogg_int32_t    ret;*/
-  /*fprintf(stderr,"Adding SAD (%3i,%3i) [%2ix%2i]: ",
-   _block->vx-2<<2,_block->vy-2<<2,
-   4<<_block->log_mvb_sz,4<<_block->log_mvb_sz);*/
-  return /*ret=*/od_mv_est_sad8(_est,_ref,_block->vx,_block->vy,
-   _block->c,_block->s,_block->log_mvb_sz);
-  /*fprintf(stderr,"%6i\n",ret);
-  return ret;*/
-}
-
-/*Gets the change in SAD for the blocks affected by the given DP node, using
-   the current state of the grid.*/
-static ogg_int32_t od_mv_dp_get_sad_change8(od_mv_est_ctx *_est,int _ref,
- od_mv_dp_node *_dp,ogg_int32_t _block_sads[8]){
-  int         bi;
-  ogg_int32_t dd;
-  dd=0;
-  for(bi=0;bi<_dp->nblocks;bi++){
-    od_mv_node *block;
-    block=_dp->blocks[bi];
-    _block_sads[bi]=od_mv_est_block_sad8(_est,_ref,block);
-    /*fprintf(stderr,"SAD change for block (%i,%i) [%ix%i]: %i-%i=%i\n",
-     block->vx,block->vy,1<<block->log_mvb_sz+2,1<<block->log_mvb_sz+2,
-     _block_sads[bi],block->sad,_block_sads[bi]-block->sad);*/
-    dd+=_block_sads[bi]-block->sad;
-  }
-  return dd;
-}
-
-/*Computes a rate adjustment for the predictors changed by following the given
-   trellis path.
-  As a side effect, enough of the trellis needed to evaluate that change is
-   loaded into the MV grid.
-  _pred:   The previously set up prediction update state.
-  _dp:     The current DP node.
-  _cstate: The DP state currently being examined.
-           Its MV must have already been placed in the grid.
-  _prevsi: The state index to follow in the previous DP node.
-  _mv_res: The motion vector resolution (0=1/8th pel to 2=1/2 pel).
-  Return: The change in rate for the preceding MVs.*/
-static int od_mv_dp_get_rate_change(od_state *_state,od_mv_dp_node *_dp,
- int *_cur_mv_rate,int _pred_mv_rates[17],int _prevsi,int _mv_res){
-  od_mv_node    *mv;
-  od_mv_grid_pt *mvg;
-  int            nhmvbs;
-  int            nvmvbs;
-  int            pred[2];
-  int            pi;
-  int            dr;
-  /*Move the state from the current trellis path into the grid.*/
-  if(_dp->min_predictor_node!=NULL){
-    int            pred_sis[8];
-    int            pred_si;
-    int            npreds;
-    od_mv_dp_node *pred_dp;
-    npreds=_dp-_dp->min_predictor_node;
-    /*if(npreds>8)fprintf(stderr,"Too far back!\n");*/
-    /*fprintf(stderr,"Restoring ");*/
-    /*First, follow the trellis path backwards to find the state used in each
-       node.*/
-    pred_si=pred_sis[npreds-1]=_prevsi;
-    for(pi=2;pi<=npreds;pi++){
-      pred_dp=_dp-pi;
-      pred_si=pred_dp[1].states[pred_si].prevsi;
-      if(pred_si>=pred_dp[0].nstates)pred_si-=pred_dp[0].nstates;
-      pred_sis[npreds-pi]=pred_si;
-    }
-    /*Then restore that state going FORWARDS.*/
-    for(pred_dp=_dp->min_predictor_node;pred_dp<_dp;pred_dp++){
-      pred_si=pred_sis[pred_dp-_dp->min_predictor_node];
-      /*Restore the state for this MV itself.*/
-      pred_dp->mv->mv_rate=pred_dp->states[pred_si].mv_rate;
-      mvg=pred_dp->mvg;
-      mvg->mv[0]=pred_dp->states[pred_si].mv[0];
-      mvg->mv[1]=pred_dp->states[pred_si].mv[1];
-      /*fprintf(stderr,"(%i,%i:%i)->(%i,%i) ",
-       pred_dp->mv->vx,pred_dp->mv->vy,pred_si,mvg->mv[0],mvg->mv[1]);*/
-      /*Restore the state for the MVs this one predicted.*/
-      for(pi=0;pi<pred_dp->npred_changeable;pi++){
-        pred_dp->predicted_mvs[pi]->mv_rate=
-         pred_dp->states[pred_si].pred_mv_rates[pi];
-      }
-    }
-    /*fprintf(stderr,"\n");*/
-  }
-  nhmvbs=_state->nhmbs+1<<2;
-  nvmvbs=_state->nvmbs+1<<2;
-  /*Compute the new rate for the current MV.*/
-  mv=_dp->mv;
-  if(mv->vx<2||mv->vx>nhmvbs-2||mv->vy<2||mv->vy>nvmvbs-2)*_cur_mv_rate=dr=0;
-  else{
-    od_state_get_predictor(_state,pred,mv->vx,mv->vy,
-     OD_MC_LEVEL[mv->vy&3][mv->vx&3],_mv_res);
-    mvg=_dp->mvg;
-    *_cur_mv_rate=od_mv_est_bits(
-     (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-    /*fprintf(stderr,"Current MV rate: %i-%i=%i\n",
-     *_cur_mv_rate,mv->mv_rate,*_cur_mv_rate-mv->mv_rate);*/
-    dr=*_cur_mv_rate-mv->mv_rate;
-    /*Compute the new rates for the MVs this one predicts.*/
-    /*fprintf(stderr,
-     "Calculating predicted pred_mv_rates for node (%i,%i):\n",
-     _dp->mv->vx,_dp->mv->vy);*/
-    for(pi=0;pi<_dp->npredicted;pi++){
-      mv=_dp->predicted_mvs[pi];
-      mvg=_dp->predicted_mvgs[pi];
-      od_state_get_predictor(_state,pred,mv->vx,mv->vy,
-       OD_MC_LEVEL[mv->vy&3][mv->vx&3],_mv_res);
-      _pred_mv_rates[pi]=od_mv_est_bits(
-       (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-      /*fprintf(stderr,"Calculated predicted mv_rate of %i for (%i,%i)\n",
-       _pred_mv_rates[pi],mv->vx,mv->vy);
-      fprintf(stderr,"Predictor was: (%i,%i)   MV was: (%i,%i)\n",
-       pred[0],pred[1],mvg->mv[0]>>_mv_res,mvg->mv[1]>>_mv_res);*/
-      /*fprintf(stderr,"Predicted MV (%i,%i) rate: %i-%i=%i\n",
-       mv->vx,mv->vy,_pred_mv_rates[pi],mv->mv_rate,
-       _pred_mv_rates[pi]-mv->mv_rate);*/
-      dr+=_pred_mv_rates[pi]-mv->mv_rate;
-    }
-  }
-  return dr;
-}
-
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-static const unsigned char OD_YCbCr_BEDGE[3]= { 41,240,110};
-static const unsigned char OD_YCbCr_VEDGE[3]= {145, 54, 34};
-static const unsigned char OD_YCbCr_VBEDGE[3]={170,166, 16};
-
-static void od_mv_dp_animate_state(od_state *_state,int _ref,
- od_mv_dp_node *_dp,int _has_gap){
-  od_mv_dp_node *dp;
-  char           iter_label[16];
-  int            active_states[OD_DP_NSTATES_MAX<<1];
-  int            prev_active_states[OD_DP_NSTATES_MAX<<1];
-  int            nactive_states;
-  int            nprev_active_states;
-  int            state;
-  int            si;
-  int            x0;
-  int            y0;
-  od_state_mc_predict(_state,_ref);
-  od_state_fill_vis(_state);
-  /*Now, draw the current state of the DP.*/
-  /*First draw the candidate edge labels for the active trellis paths.*/
-  for(si=0;si<_dp->nstates;si++){
-    prev_active_states[si<<1]=si;
-    prev_active_states[si<<1|1]=si+_dp->nstates;
-  }
-  nprev_active_states=_dp->nstates<<1;
-  nactive_states=0;
-  dp=_dp;
-  do{
-    int has_vedge;
-    int has_bedge;
-    if(nactive_states>0){
-      x0=(dp[0].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-      y0=(dp[0].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-      has_vedge=has_bedge=0;
-      for(si=0;si<nprev_active_states;si++){
-        if(prev_active_states[si]<dp[0].nstates)has_bedge=1;
-        else has_vedge=1;
-      }
-      if(has_vedge||has_bedge){
-        int mvb_sz;
-        int x1;
-        int y1;
-        x1=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-        y1=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-        od_img_draw_line(&_state->vis_img,x0,y0,x1,y1,
-         has_vedge?has_bedge?OD_YCbCr_VBEDGE:OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-        if(dp[1].mv->vx-dp[0].mv->vx>1){
-          mvb_sz=dp[1].mv->vx-dp[0].mv->vx;
-          if(!_has_gap||dp+1!=_dp)mvb_sz>>=1;
-          if(!_state->mv_grid[dp[0].mv->vy][dp[0].mv->vx+mvb_sz].valid){
-            if(dp[0].mv->vy>=mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy-mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0-(mvb_sz<<3),x0+(mvb_sz<<3),y1,
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-            if(dp[0].mv->vy<=(_state->nvmbs+1<<2)-mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0+(mvb_sz<<3),x0+(mvb_sz<<3),y1,
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-          }
-        }
-        else if(dp[1].mv->vy-dp[0].mv->vy>1){
-          mvb_sz=dp[1].mv->vy-dp[0].mv->vy;
-          if(!_has_gap||dp+1!=_dp)mvb_sz>>=1;
-          if(!_state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx].valid){
-            if(dp[0].mv->vx>=mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx-mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0-(mvb_sz<<3),y0+(mvb_sz<<3),x1,y0+(mvb_sz<<3),
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-            if(dp[0].mv->vx<=(_state->nhmbs+1<<2)-mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0+(mvb_sz<<3),x1,y0+(mvb_sz<<3),
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-          }
-        }
-      }
-    }
-    memcpy(active_states,prev_active_states,
-     sizeof(active_states[0])*nprev_active_states);
-    nactive_states=nprev_active_states;
-    /*Follow the chain backwards to find the new active states.*/
-    nprev_active_states=0;
-    for(si=0;si<nactive_states;si++){
-      int sj;
-      state=active_states[si];
-      if(state>=dp[0].nstates)state-=dp[0].nstates;
-      state=dp[0].states[state].prevsi;
-      for(sj=0;sj<nprev_active_states&&prev_active_states[sj]!=state;sj++);
-      if(sj>=nprev_active_states){
-        prev_active_states[nprev_active_states++]=state;
-      }
-    }
-  }
-  while((dp--)->states[0].prevsi>=0);
-  /*Now, draw all the candidate MVs in the active trellis paths.
-    These two steps used to be together; now they're apart.
-    Sorry for the mess that caused.*/
-  /*Redraw the MVs, so they appear over the edge labels above.*/
-  od_state_draw_mvs(_state);
-  for(si=0;si<_dp->nstates;si++){
-    prev_active_states[si<<1]=si;
-    prev_active_states[si<<1|1]=si+_dp->nstates;
-  }
-  nprev_active_states=_dp->nstates<<1;
-  nactive_states=0;
-  dp=_dp;
-  do{
-    if(nactive_states>0){
-      x0=(dp[0].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-      y0=(dp[0].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-      if(!_has_gap||dp+1!=_dp){
-        x0=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-        y0=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-        for(si=0;si<nactive_states;si++){
-          state=active_states[si];
-          if(state>=dp[1].nstates)state-=dp[1].nstates;
-          od_img_draw_line(&_state->vis_img,x0,y0,
-           x0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[0],2,2),
-           y0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[1],2,2),
-           OD_YCbCr_MVCAND);
-        }
-      }
-    }
-    memcpy(active_states,prev_active_states,
-     sizeof(active_states[0])*nprev_active_states);
-    nactive_states=nprev_active_states;
-    /*Follow the chain backwards to find the new active states.*/
-    nprev_active_states=0;
-    for(si=0;si<nactive_states;si++){
-      int sj;
-      state=active_states[si];
-      if(state>=dp[0].nstates)state-=dp[0].nstates;
-      state=dp[0].states[state].prevsi;
-      for(sj=0;sj<nprev_active_states&&prev_active_states[sj]!=state;sj++);
-      if(sj>=nprev_active_states){
-        prev_active_states[nprev_active_states++]=state;
-      }
-    }
-  }
-  while((dp--)->states[0].prevsi>=0);
-  /*Draw the first state's MV's.*/
-  x0=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-  y0=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-  for(si=0;si<nactive_states;si++){
-    state=active_states[si];
-    if(state>=dp[1].nstates)state-=dp[1].nstates;
-    od_img_draw_line(&_state->vis_img,x0,y0,
-     x0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[0],2,2),
-     y0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[1],2,2),
-     OD_YCbCr_MVCAND);
-  }
-  sprintf(iter_label,"ani%08i",_state->ani_iter++);
-  od_state_dump_img(_state,&_state->vis_img,iter_label);
-}
-#endif
-
-/*Row refinement.*/
-
-static void od_mv_dp_row_init(od_mv_est_ctx *_est,od_mv_dp_node *_dp,
- int _vx,int _vy,od_mv_dp_node *_prev_dp){
-  od_state      *state;
-  int            nhmvbs;
-  int            nvmvbs;
-  state=&_est->enc->state;
-  _dp->mv=_est->mvs[_vy]+_vx;
-  _dp->mvg=state->mv_grid[_vy]+_vx;
-  _dp->original_mv[0]=_dp->mvg->mv[0];
-  _dp->original_mv[1]=_dp->mvg->mv[1];
-  _dp->original_etype=_dp->mvg->right;
-  _dp->original_mv_rate=_dp->mv->mv_rate;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_vx<2||_vx>nhmvbs-2||_vy<2||_vy>nvmvbs-2){
-    /*Strictly speaking, we may be used to predict others, but since our MV
-       can't possibly change, neither can their rate.*/
-    _dp->npredicted=_dp->npred_changeable=0;
-    /*No one else is used to predict us, or any other MV we predict.
-      However, we may still need to load the previous MV into the grid to
-       estimate our SADs properly.*/
-    _dp->min_predictor_node=_prev_dp;
-  }
-  else{
-    int level;
-    int pred_hist;
-    int npred;
-    int nchangeable;
-    int pi;
-    /*Get the list of MVs we help predict.*/
-    level=OD_MC_LEVEL[_vy&3][_vx&3];
-    /*fprintf(stderr,"Initializing node (%i,%i) [%i,%i] at level %i:\n",
-     _vx,_vy,_vx-2<<2,_vy-2<<2,level);*/
-    npred=nchangeable=0;
-    for(pi=0;pi<OD_NPREDICTED[level];pi++){
-      int px;
-      int py;
-      px=_vx+OD_ROW_PREDICTED[level][pi][0];
-      if(px<2||px>nhmvbs-2)continue;
-      py=_vy+OD_ROW_PREDICTED[level][pi][1];
-      if(py<2||py>nvmvbs-2)continue;
-      if(state->mv_grid[py][px].valid){
-        /*fprintf(stderr,"Adding (%i,%i) as a PREDICTED MV.\n",px,py);*/
-        _dp->predicted_mvgs[npred]=state->mv_grid[py]+px;
-        _dp->predicted_mvs[npred]=_est->mvs[py]+px;
-        if(pi<OD_NROW_PRED_CHANGEABLE[level]){
-          /*fprintf(stderr,"It is CHANGEABLE.\n");*/
-          _dp->original_mv_rates[npred]=_est->mvs[py][px].mv_rate;
-          nchangeable++;
-        }
-        npred++;
-      }
-    }
-    _dp->npredicted=npred;
-    _dp->npred_changeable=nchangeable;
-    /*Now, figure out the earliest DP node that influences our own prediction,
-       or that of one of the other MVs we predict.*/
-    pred_hist=OD_ROW_PRED_HIST_SIZE[level];
-    /*fprintf(stderr,"Marking history up to %i back: %i>=%i\n",
-     pred_hist,_prev_dp!=NULL?_prev_dp->mv->vx:-1,_vx-pred_hist);*/
-    if(_prev_dp!=NULL&&_prev_dp->mv->vx>=_vx-pred_hist){
-      od_mv_dp_node *dp_pred;
-      for(dp_pred=_prev_dp;dp_pred->mv->vx>_vx-pred_hist&&
-       dp_pred->states[0].prevsi>=0;dp_pred--);
-      /*fprintf(stderr,"Stopped at (%i,%i) (%i<=%i? %i) (%i<0? %i)\n",
-       dp_pred->mv->vx,dp_pred->mv->vy,dp_pred->mv->vx,_vx-pred_hist,
-       dp_pred->mv->vx<=_vx-pred_hist,
-       dp_pred->states[0].prevsi,dp_pred->states[0].prevsi<0);*/
-      if(dp_pred->mv->vx<_vx-pred_hist){dp_pred++;/*fprintf(stderr,"Too far, incrementing to (%i,%i).\n",dp_pred->mv->vx,dp_pred->mv->vy);*/}
-      _dp->min_predictor_node=dp_pred;
-      /*fprintf(stderr,"State will be restored back to (%i,%i).\n",
-       dp_pred->mv->vx,dp_pred->mv->vy);*/
-    }
-    else _dp->min_predictor_node=NULL;
-  }
-}
-
-static void od_mv_dp_first_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(_vx>2){
-    if(level>=3){
-      if(_vy>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(_vy<=nvmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-    }
-    else{
-      int half_mvb_sz;
-      int mvb_off;
-      half_mvb_sz=mvb_sz>>1;
-      if(_vy>=mvb_sz){
-        if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-          if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-          }
-          if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      }
-      if(_vy<=nvmvbs-mvb_sz){
-        if(state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_off;
-          if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-mvb_off;
-          }
-          if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-(mvb_off<<1);
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_prev_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       prev_level;
-  int       log_mvb_sz;
-  int       prev_log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  prev_level=OD_MC_LEVEL[_vy&3][_vx-mvb_sz&3];
-  prev_log_mvb_sz=4-prev_level>>1;
-  nblocks=0;
-  if(level>=3){
-    if(_vy>=mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_sz<<1)]+_vx-mvb_sz;
-      }
-    }
-    if(_vy<=nvmvbs-mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy+mvb_sz][_vx-mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-      }
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vy>=mvb_sz){
-      if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-        if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-        }
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-          if(!state->mv_grid[_vy-mvb_off][_vx-(mvb_off<<1)].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-(mvb_off<<1);
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_sz<<1)]+_vx-mvb_sz;
-        }
-      }
-    }
-    if(_vy<=nvmvbs-mvb_sz){
-      if(state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_off;
-        if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-mvb_off;
-        }
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-(mvb_off<<1);
-          if(!state->mv_grid[_vy+mvb_off][_vx-(mvb_off<<1)].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-(mvb_off<<1);
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy+mvb_sz][_vx-mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-        }
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_last_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vy>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-    if(_vy<=nvmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-  }
-  else{
-    int half_mvb_sz;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vy>=mvb_sz){
-      if(state->mv_grid[_vy-half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx;
-        if(!state->mv_grid[_vy][_vx+mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx+mvb_off;
-        }
-        if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx;
-        }
-      }
-      else _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-    }
-    if(_vy<=nvmvbs-mvb_sz){
-      if(state->mv_grid[_vy+half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-        if(!state->mv_grid[_vy][_vx+mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx+mvb_off;
-        }
-        if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx;
-        }
-      }
-      else _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_restore_row_state(od_mv_dp_node *_dp){
-  od_mv_grid_pt *mvg;
-  int            pi;
-  do{
-    /*Restore the state for this MV itself.*/
-    _dp->mv->mv_rate=_dp->original_mv_rate;
-    mvg=_dp->mvg;
-    mvg->mv[0]=_dp->original_mv[0];
-    mvg->mv[1]=_dp->original_mv[1];
-    mvg->right=_dp->original_etype;
-    for(pi=0;pi<_dp->npred_changeable;pi++){
-      /*Restore the state for the MVs this one predicted.*/
-      _dp->predicted_mvs[pi]->mv_rate=_dp->original_mv_rates[pi];
-    }
-  }
-  while((_dp--)->states[0].prevsi>=0);
-}
-
-static void od_mv_dp_install_row_state(od_mv_dp_node *_dp,int _prevsi){
-  od_mv_dp_node *dp;
-  od_mv_grid_pt *mvg;
-  int            nextsi;
-  int            si;
-  int            pi;
-  int            bi;
-  /*We must install the state going FORWARDS, since the pred_mv_rates may have
-     changed several times over the course of the trellis.
-    Therefore, first we reverse all of the prevsi pointers to make them act
-     like nextsi pointers.
-    We _can_ update the edge type flags here, however, and it is much more
-     convenient to do so while going backwards than forwards.*/
-  nextsi=-1;
-  for(dp=_dp,si=_prevsi;si>=0;si=_prevsi){
-    dp--;
-    /*fprintf(stderr,"Node %i, prevsi: %i nextsi: %i\n",_dp-dp,_prevsi,nextsi);*/
-    if(si>=dp->nstates){
-      dp->mvg->right=1;
-      si-=dp->nstates;
-    }
-    else dp->mvg->right=0;
-    _prevsi=dp->states[si].prevsi;
-    dp->states[si].prevsi=nextsi;
-    nextsi=si;
-  }
-  /*Now we traverse forward installing the rest of the state.*/
-  for(si=nextsi;dp<_dp;dp++){
-    /*fprintf(stderr,"Installing state %i for (%i,%i):\n",
-     si,dp->mv->vx,dp->mv->vy);*/
-    /*Install the state for this MV itself.*/
-    /*fprintf(stderr,"Installing current mv_rate for (%i,%i): %i\n",
-     dp->mv->vx,dp->mv->vy,dp->states[si].mv_rate);*/
-    dp->mv->mv_rate=dp->states[si].mv_rate;
-    mvg=dp->mvg;
-    mvg->mv[0]=dp->states[si].mv[0];
-    mvg->mv[1]=dp->states[si].mv[1];
-    /*Install the new block SADs.*/
-    for(bi=0;bi<dp->nblocks;bi++){
-      dp->blocks[bi]->sad=dp->states[si].block_sads[bi];
-    }
-    /*Install the state for the MVs this one predicted.*/
-    for(pi=0;pi<dp->npredicted;pi++){
-      /*fprintf(stderr,"Installing predicted mv_rate for (%i,%i): %i\n",
-       dp->predicted_mvs[pi]->vx,dp->predicted_mvs[pi]->vy,
-       dp->states[si].pred_mv_rates[pi]);*/
-      dp->predicted_mvs[pi]->mv_rate=dp->states[si].pred_mv_rates[pi];
-    }
-    si=dp->states[si].prevsi;
-  }
-}
-
-static ogg_int32_t od_mv_est_refine_row(od_mv_est_ctx *_est,int _ref,int _vy,
- int _log_dsz,int _mv_res,const int *_pattern_nsites,
- const od_pattern *_pattern){
-  od_state       *state;
-  od_mv_grid_pt  *grid;
-  od_mv_grid_pt  *pmvg;
-  od_mv_grid_pt  *mvg;
-  od_mv_dp_node  *dp_node;
-  od_mv_dp_state *cstate;
-  od_mv_dp_state *pstate;
-  ogg_int32_t     dcost;
-  int             nhmvbs;
-  int             nvmvbs;
-  int             mv_res;
-  int             level;
-  int             log_mvb_sz;
-  int             mvb_sz;
-  int             labels_only;
-  int             nsites;
-  int             sitei;
-  int             site;
-  int             curx;
-  int             cury;
-  int             vx;
-  int             b;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  labels_only=_vy<2||_vy>nvmvbs-2;
-  mv_res=OD_MINI(_log_dsz,2);
-  grid=state->mv_grid[_vy];
-  dcost=0;
-  /*fprintf(stderr,"Refining row %i (%i)...\n",_vy,_vy-2<<2);*/
-  for(vx=0;;vx++){
-    ogg_int32_t block_sads[18][8];
-    ogg_int32_t best_cost;
-    ogg_int32_t cost;
-    ogg_int32_t best_dd;
-    ogg_int32_t dd;
-    int         cur_mv_rates[9];
-    int         pred_mv_rates[9][17];
-    int         best_dr;
-    int         dr;
-    int         best_si;
-    int         si;
-    int         has_gap;
-    for(;vx<=nhmvbs&&!grid[vx].valid;vx++);
-    if(vx>nhmvbs)break;
-    level=OD_MC_LEVEL[_vy&3][vx&3];
-    /*fprintf(stderr,"Starting DP at vertex %i (%i), level %i\n",
-     vx,vx-2<<2,level);*/
-    log_mvb_sz=4-level>>1;
-    mvb_sz=1<<log_mvb_sz;
-    mvg=grid+vx;
-    curx=mvg->mv[0];
-    cury=mvg->mv[1];
-    dp_node=_est->dp_nodes;
-    od_mv_dp_row_init(_est,dp_node,vx,_vy,NULL);
-    od_mv_dp_first_row_block_setup(_est,dp_node,vx,_vy);
-    /*fprintf(stderr,"TESTING block SADs:\n");*/
-    od_mv_dp_get_sad_change8(_est,_ref,dp_node,block_sads[0]);
-    /*Compute the set of states for the first node.*/
-    if(vx>=2&&!labels_only){
-      b=od_mv_est_get_boundary_case(state,vx,_vy,curx,cury,
-       1<<_log_dsz,log_mvb_sz+2);
-      nsites=_pattern_nsites[b];
-    }
-    else b=nsites=0;
-    for(sitei=0,site=4;;sitei++){
-      cstate=dp_node[0].states+sitei;
-      cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-      cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-      cstate->prevsi=-1;
-      mvg->mv[0]=cstate->mv[0];
-      mvg->mv[1]=cstate->mv[1];
-      cstate->dr=od_mv_dp_get_rate_change(state,dp_node,
-       &cstate->mv_rate,cstate->pred_mv_rates,-1,_mv_res);
-      cstate->dd=od_mv_dp_get_sad_change8(_est,_ref,dp_node,
-       cstate->block_sads);
-      /*fprintf(stderr,"State: %i (%g,%g)  dr: %i  dd: %i  dopt: %i\n",
-       sitei,0.125*cstate->mv[0],0.125*cstate->mv[1],cstate->dr,cstate->dd,
-       cstate->dr*_est->lambda+(cstate->dd<<OD_LAMBDA_SCALE));*/
-      if(sitei>=nsites)break;
-      site=_pattern[b][sitei];
-    }
-    dp_node[0].nstates=nsites+1;
-    has_gap=0;
-    pmvg=mvg;
-    for(;vx<nhmvbs;){
-      /*Find the next available MV to advance to.*/
-      if(level&1){
-        if(!grid[vx+mvb_sz].valid){
-          /*fprintf(stderr,"Gap found at %i (%i), stopping\n",vx,vx-2<<2);*/
-          has_gap=1;
-          break;
-        }
-        else if(level>=3)vx++;
-        else if(!grid[vx+1].valid)vx+=mvb_sz;
-        else vx++;
-      }
-      else if(level>=4)vx++;
-      else if(!grid[vx+(mvb_sz>>1)].valid)vx+=mvb_sz;
-      else if(level>=2||!grid[vx+1].valid)vx+=mvb_sz>>1;
-      else vx++;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        od_mv_dp_restore_row_state(dp_node);
-        od_mv_dp_animate_state(state,_ref,dp_node,0);
-      }
-#endif
-      level=OD_MC_LEVEL[_vy&3][vx&3];
-      /*fprintf(stderr,"Continuing DP at vertex %i (%i), level %i\n",
-       vx,vx-2<<2,level);*/
-      log_mvb_sz=4-level>>1;
-      mvb_sz=1<<log_mvb_sz;
-      mvg=grid+vx;
-      curx=mvg->mv[0];
-      cury=mvg->mv[1];
-      od_mv_dp_row_init(_est,dp_node+1,vx,_vy,dp_node);
-      od_mv_dp_prev_row_block_setup(_est,dp_node+1,vx,_vy);
-      /*fprintf(stderr,"TESTING block SADs:\n");
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,block_sads[0]);*/
-      /*Compute the set of states for this node.*/
-      if(vx<=nhmvbs-2&&!labels_only){
-        b=od_mv_est_get_boundary_case(state,vx,_vy,curx,cury,
-         1<<_log_dsz,log_mvb_sz+2);
-        nsites=_pattern_nsites[b];
-      }
-      else nsites=0;
-      for(sitei=0,site=4;;sitei++){
-        cstate=dp_node[1].states+sitei;
-        cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-        cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-        best_si=pmvg->right?dp_node[0].nstates:0;
-        best_dr=dp_node[0].states[0].dr;
-        best_dd=dp_node[0].states[0].dd;
-        best_cost=INT_MAX;
-        mvg->mv[0]=cstate->mv[0];
-        mvg->mv[1]=cstate->mv[1];
-        for(si=0;si<dp_node[0].nstates;si++){
-          pstate=dp_node[0].states+si;
-          /*Get the rate change for this state using previous state si.
-            This automatically loads the required bits of the trellis path into
-             the grid, like the previous MV.*/
-          cstate->dr=od_mv_dp_get_rate_change(state,dp_node+1,
-           cur_mv_rates+si,pred_mv_rates[si],si,_mv_res);
-          /*Test against the previous state with a B edge.*/
-          if(_est->flags&OD_MC_USEB){
-            pmvg->right=0;
-            dr=pstate->dr+cstate->dr;
-            /*Account for label mismatch.*/
-            /*if(pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates)dr+=2;*/
-            dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-             block_sads[si]);
-            cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-            /*fprintf(stderr,
-             "State: %2i (%7g,%7g) P.State: %iB  dr: %3i  dd: %6i  dopt: %7i\n",
-             sitei,0.125*cstate->mv[0],0.125*cstate->mv[1],si,dr,dd,cost);*/
-            if(cost<best_cost){
-              best_si=si;
-              best_cost=cost;
-              best_dd=dd;
-              best_dr=dr;
-            }
-          }
-          if(_est->flags&OD_MC_USEV){
-            /*Test against the previous state with a V edge.*/
-            pmvg->right=1;
-            dr=pstate->dr+cstate->dr;
-            /*Account for label mismatch.*/
-            /*if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;*/
-            dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-             block_sads[si+dp_node[0].nstates]);
-            cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-            /*fprintf(stderr,
-             "State: %2i (%7g,%7g) P.State: %iV  dr: %3i  dd: %6i  dopt: %7i\n",
-             sitei,0.125*cstate->mv[0],0.125*cstate->mv[1],si,dr,dd,cost);*/
-            if(cost<best_cost){
-              best_si=si+dp_node[0].nstates;
-              best_cost=cost;
-              best_dd=dd;
-              best_dr=dr;
-            }
-          }
-        }
-        /*fprintf(stderr,"State: %2i  Best P.State: %i%c\n",
-         sitei,best_si%dp_node[0].nstates,best_si>dp_node[0].nstates?'V':'B');*/
-        cstate->prevsi=best_si;
-        cstate->dr=best_dr;
-        cstate->dd=best_dd;
-        memcpy(cstate->block_sads,block_sads[best_si],
-         sizeof(block_sads[0][0])*dp_node[1].nblocks);
-        if(best_si<dp_node[0].nstates){
-          cstate->mv_rate=cur_mv_rates[best_si];
-          memcpy(cstate->pred_mv_rates,pred_mv_rates[best_si],
-           sizeof(pred_mv_rates[0][0])*dp_node[1].npredicted);
-        }
-        else{
-          cstate->mv_rate=cur_mv_rates[best_si-dp_node[0].nstates];
-          memcpy(cstate->pred_mv_rates,
-           pred_mv_rates[best_si-dp_node[0].nstates],
-           sizeof(pred_mv_rates[0][0])*dp_node[1].npredicted);
-        }
-        if(sitei>=nsites)break;
-        site=_pattern[b][sitei];
-      }
-      dp_node[1].nstates=nsites+1;
-      dp_node++;
-      pmvg=mvg;
-    }
-    /*fprintf(stderr,"Finished DP at vertex %i (%i)\n",
-     dp_node[0].mv->vx,dp_node[0].mv->vx-2<<2);*/
-    best_si=pmvg->right?dp_node[0].nstates:0;
-    best_cost=INT_MAX;
-    /*TODO: Once we stop optimizing at arbitrary places, we'll need to
-       compute the rate change of MVs we didn't get to.*/
-    dp_node[1].npredicted=dp_node[1].npred_changeable=0;
-    if(dp_node[0].mv->vx<nhmvbs-2){
-      od_mv_dp_last_row_block_setup(_est,dp_node+1,dp_node[0].mv->vx,_vy);
-      /*fprintf(stderr,"TESTING block SADs:\n");
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      pmvg->mv[1]=dp_node[0].original_mv[1];
-      od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,block_sads[0]);*/
-      for(si=0;si<dp_node[0].nstates;si++){
-        pstate=dp_node[0].states+si;
-        pmvg->mv[0]=pstate->mv[0];
-        pmvg->mv[1]=pstate->mv[1];
-        /*Test against the state with a following B edge.*/
-        if(_est->flags&OD_MC_USEB){
-          pmvg->right=0;
-          dr=pstate->dr;
-          /*Account for label mismatch.*/
-          /*if(!has_gap&&pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates){
-            rate+=2;
-          }*/
-          dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-           block_sads[si]);
-          cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: --  P.State: %iB  dr: %3i  dd: %6i  dopt: %7i\n",
-           si,dr,dd,cost);*/
-          if(cost<best_cost){
-            best_si=si;
-            best_cost=cost;
-          }
-        }
-        /*Test against the state with a following V edge.
-          If we hit a gap, then the edge label does not actually affect
-           anything, so we can skip these tests if we did the ones above.*/
-        if((_est->flags&OD_MC_USEV)&&(!has_gap||!(_est->flags&OD_MC_USEB))){
-          pmvg->right=1;
-          dr=pstate->dr;
-          /*Account for label mismatch.*/
-          /*if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;*/
-          dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-           block_sads[si+dp_node[0].nstates]);
-          cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: --  P.State: %iV  dr: %3i  dd: %6i  dopt: %7i\n",
-           si,dr,dd,cost);*/
-          if(cost<best_cost){
-            best_si=si+dp_node[0].nstates;
-            best_cost=cost;
-          }
-        }
-      }
-    }
-    /*There are no blocks to accumulate SAD after this one, so pick the best
-       state so far.*/
-    else{
-      dp_node[1].nblocks=0;
-      for(si=0;si<dp_node[0].nstates;si++){
-        pstate=dp_node[0].states+si;
-        cost=pstate->dr*_est->lambda+(pstate->dd<<OD_LAMBDA_SCALE);
-        if(cost<best_cost){
-          best_si=si;
-          best_cost=cost;
-        }
-      }
-      if(pmvg->right)best_si+=dp_node[0].nstates;
-    }
-    /*fprintf(stderr,"Best P.State: %i%c dopt: %7i\n",
-     best_si%dp_node[0].nstates,best_si>dp_node[0].nstates?'V':'B',best_cost);*/
-    if(best_cost>0){
-      /*Our optimal path is worse than what we started with!
-        Restore the original state and give up.*/
-      fprintf(stderr,"Best cost (%7i) > 0! Optimization failed.\n",best_cost);
-      od_mv_dp_restore_row_state(dp_node);
-    }
-    else{
-      int bi;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        char iter_label[16];
-        od_mv_dp_restore_row_state(dp_node);
-        od_mv_dp_animate_state(state,_ref,dp_node,0);
-        od_mv_dp_install_row_state(dp_node+1,best_si);
-        od_state_mc_predict(state,_ref);
-        od_state_fill_vis(state);
-        sprintf(iter_label,"ani%08i",state->ani_iter++);
-        od_state_dump_img(state,&state->vis_img,iter_label);
-      }
-#endif
-      /*Update the state along the optimal path.*/
-      od_mv_dp_install_row_state(dp_node+1,best_si);
-      /*Store the SADs from this last node, too.*/
-      for(bi=0;bi<dp_node[1].nblocks;bi++){
-        dp_node[1].blocks[bi]->sad=block_sads[best_si][bi];
-      }
-      dcost+=best_cost;
-    }
-  }
-  od_mv_est_check_rd_state(_est,_ref,_mv_res);
-  return dcost;
-}
-
-/*Column refinement.*/
-
-static void od_mv_dp_col_init(od_mv_est_ctx *_est,od_mv_dp_node *_dp,
- int _vx,int _vy,od_mv_dp_node *_prev_dp){
-  od_state      *state;
-  int            nhmvbs;
-  int            nvmvbs;
-  state=&_est->enc->state;
-  _dp->mv=_est->mvs[_vy]+_vx;
-  _dp->mvg=state->mv_grid[_vy]+_vx;
-  _dp->original_mv[0]=_dp->mvg->mv[0];
-  _dp->original_mv[1]=_dp->mvg->mv[1];
-  _dp->original_etype=_dp->mvg->down;
-  _dp->original_mv_rate=_dp->mv->mv_rate;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_vx<2||_vx>nhmvbs-2||_vy<2||_vy>nvmvbs-2){
-    /*Strictly speaking, we may be used to predict others, but since our MV
-       can't possibly change, neither can their rate.*/
-    _dp->npredicted=_dp->npred_changeable=0;
-    /*No one else is used to predict us, or any other MV we predict.
-      However, we may still need to load the previous MV into the grid to
-       estimate our SADs properly.*/
-    _dp->min_predictor_node=_prev_dp;
-  }
-  else{
-    int level;
-    int pred_hist;
-    int npred;
-    int nchangeable;
-    int pi;
-    /*Get the list of MVs we help predict.*/
-    level=OD_MC_LEVEL[_vy&3][_vx&3];
-    /*fprintf(stderr,"Initializing node (%i,%i) [%i,%i] at level %i:\n",
-     _vx,_vy,_vx-2<<2,_vy-2<<2,level);*/
-    npred=nchangeable=0;
-    for(pi=0;pi<OD_NPREDICTED[level];pi++){
-      int px;
-      int py;
-      px=_vx+OD_COL_PREDICTED[level][pi][0];
-      if(px<2||px>nhmvbs-2)continue;
-      py=_vy+OD_COL_PREDICTED[level][pi][1];
-      if(py<2||py>nvmvbs-2)continue;
-      if(state->mv_grid[py][px].valid){
-        /*fprintf(stderr,"Adding (%i,%i) as a PREDICTED MV.\n",px,py);*/
-        _dp->predicted_mvgs[npred]=state->mv_grid[py]+px;
-        _dp->predicted_mvs[npred]=_est->mvs[py]+px;
-        if(pi<OD_NCOL_PRED_CHANGEABLE[level]){
-          /*fprintf(stderr,"It is CHANGEABLE.\n");*/
-          _dp->original_mv_rates[npred]=_est->mvs[py][px].mv_rate;
-          nchangeable++;
-        }
-        npred++;
-      }
-    }
-    _dp->npredicted=npred;
-    _dp->npred_changeable=nchangeable;
-    /*Now, figure out the earliest DP node that influences our own prediction,
-       or that of one of the other MVs we predict.*/
-    pred_hist=OD_ROW_PRED_HIST_SIZE[level];
-    if(_prev_dp!=NULL&&_prev_dp->mv->vy>=_vy-pred_hist){
-      od_mv_dp_node *dp_pred;
-      for(dp_pred=_prev_dp;dp_pred->mv->vy>_vy-pred_hist&&
-       dp_pred->states[0].prevsi>=0;dp_pred--);
-      if(dp_pred->mv->vy<_vy-pred_hist)dp_pred++;
-      _dp->min_predictor_node=dp_pred;
-      /*fprintf(stderr,"State will be restored back to (%i,%i).\n",
-       dp_pred->mv->vx,dp_pred->mv->vy);*/
-    }
-    else _dp->min_predictor_node=NULL;
-  }
-}
-
-static void od_mv_dp_first_col_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nhmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(_vy>2){
-    if(level>=3){
-      if(_vx>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(_vx<=nhmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-    }
-    else{
-      int half_mvb_sz;
-      int mvb_off;
-      half_mvb_sz=mvb_sz>>1;
-      if(_vx>=mvb_sz){
-        if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-          if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-          }
-          if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      }
-      if(_vx<=nhmvbs-mvb_sz){
-        if(state->mv_grid[_vy-half_mvb_sz][_vx+half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx;
-          if(!state->mv_grid[_vy][_vx+mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx+mvb_off;
-          }
-          if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx;
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_prev_col_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nhmvbs;
-  int       level;
-  int       prev_level;
-  int       log_mvb_sz;
-  int       prev_log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  prev_level=OD_MC_LEVEL[_vy-mvb_sz&3][_vx&3];
-  prev_log_mvb_sz=4-prev_level>>1;
-  nblocks=0;
-  if(level>=3){
-    if(_vx>=mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-(mvb_sz<<1);
-      }
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy-mvb_sz][_vx+mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx+mvb_sz;
-      }
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vx>=mvb_sz){
-      if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-        }
-        if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-          if(!state->mv_grid[_vy-(mvb_off<<1)][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-(mvb_off<<1);
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-(mvb_sz<<1);
-        }
-      }
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      if(state->mv_grid[_vy-half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx;
-        if(!state->mv_grid[_vy][_vx+mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx+mvb_off;
-        }
-        if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx;
-          if(!state->mv_grid[_vy-(mvb_off<<1)][_vx+mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx+mvb_off;
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy-mvb_sz][_vx+mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx+mvb_sz;
-        }
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_last_col_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nhmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vx>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-    if(_vx<=nhmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-  }
-  else{
-    int half_mvb_sz;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vx>=mvb_sz){
-      if(state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_off;
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-(mvb_off<<1);
-        }
-        if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-mvb_off;
-        }
-      }
-      else _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      if(state->mv_grid[_vy+half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-        if(!state->mv_grid[_vy][_vx+mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx+mvb_off;
-        }
-        if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx;
-        }
-      }
-      else _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_restore_col_state(od_mv_dp_node *_dp){
-  od_mv_grid_pt *mvg;
-  int            pi;
-  do{
-    /*Restore the state for this MV itself.*/
-    _dp->mv->mv_rate=_dp->original_mv_rate;
-    mvg=_dp->mvg;
-    mvg->mv[0]=_dp->original_mv[0];
-    mvg->mv[1]=_dp->original_mv[1];
-    mvg->down=_dp->original_etype;
-    for(pi=0;pi<_dp->npred_changeable;pi++){
-      /*Restore the state for the MVs this one predicted.*/
-      _dp->predicted_mvs[pi]->mv_rate=_dp->original_mv_rates[pi];
-    }
-  }
-  while((_dp--)->states[0].prevsi>=0);
-}
-
-static void od_mv_dp_install_col_state(od_mv_dp_node *_dp,int _prevsi){
-  od_mv_dp_node *dp;
-  od_mv_grid_pt *mvg;
-  int            nextsi;
-  int            si;
-  int            pi;
-  int            bi;
-  /*We must install the state going FORWARDS, since the pred_mv_rates may have
-     changed several times over the course of the trellis.
-    Therefore, first we reverse all of the prevsi pointers to make them act
-     like nextsi pointers.
-    We _can_ update the edge type flags here, however, and it is much more
-     convenient to do so while going backwards than forwards.*/
-  nextsi=-1;
-  for(dp=_dp,si=_prevsi;si>=0;si=_prevsi){
-    dp--;
-    /*fprintf(stderr,"Node %i, prevsi: %i nextsi: %i\n",_dp-dp,_prevsi,nextsi);*/
-    if(si>=dp->nstates){
-      dp->mvg->down=1;
-      si-=dp->nstates;
-    }
-    else dp->mvg->down=0;
-    _prevsi=dp->states[si].prevsi;
-    dp->states[si].prevsi=nextsi;
-    nextsi=si;
-  }
-  /*Now we traverse forward installing the rest of the state.*/
-  for(si=nextsi;dp<_dp;dp++){
-    /*Install the state for this MV itself.*/
-    dp->mv->mv_rate=dp->states[si].mv_rate;
-    mvg=dp->mvg;
-    mvg->mv[0]=dp->states[si].mv[0];
-    mvg->mv[1]=dp->states[si].mv[1];
-    /*Install the new block SADs.*/
-    for(bi=0;bi<dp->nblocks;bi++){
-      dp->blocks[bi]->sad=dp->states[si].block_sads[bi];
-    }
-    /*Install the state for the MVs this one predicted.*/
-    for(pi=0;pi<dp->npredicted;pi++){
-      dp->predicted_mvs[pi]->mv_rate=dp->states[si].pred_mv_rates[pi];
-    }
-    si=dp->states[si].prevsi;
-  }
-}
-
-static ogg_int32_t od_mv_est_refine_col(od_mv_est_ctx *_est,int _ref,int _vx,
- int _log_dsz,int _mv_res,const int *_pattern_nsites,
- const od_pattern *_pattern){
-  od_state        *state;
-  od_mv_grid_pt  **grid;
-  od_mv_grid_pt   *pmvg;
-  od_mv_grid_pt   *mvg;
-  od_mv_dp_node   *dp_node;
-  od_mv_dp_state  *cstate;
-  od_mv_dp_state  *pstate;
-  ogg_int32_t      dcost;
-  int              nhmvbs;
-  int              nvmvbs;
-  int              mv_res;
-  int              level;
-  int              log_mvb_sz;
-  int              mvb_sz;
-  int              labels_only;
-  int              nsites;
-  int              sitei;
-  int              site;
-  int              curx;
-  int              cury;
-  int              vy;
-  int              b;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  labels_only=_vx<2||_vx>nhmvbs-2;
-  mv_res=OD_MINI(_log_dsz,2);
-  grid=state->mv_grid;
-  dcost=0;
-  /*fprintf(stderr,"Refining column %i (%i)...\n",_vx,_vx-2<<2);*/
-  for(vy=0;;vy++){
-    ogg_int32_t block_sads[18][8];
-    ogg_int32_t best_cost;
-    ogg_int32_t cost;
-    ogg_int32_t best_dd;
-    ogg_int32_t dd;
-    int         cur_mv_rates[9];
-    int         pred_mv_rates[9][17];
-    int         best_dr;
-    int         dr;
-    int         best_si;
-    int         si;
-    int         has_gap;
-    for(;vy<=nvmvbs&&!grid[vy][_vx].valid;vy++);
-    if(vy>nvmvbs)break;
-    level=OD_MC_LEVEL[vy&3][_vx&3];
-    /*fprintf(stderr,"Starting DP at vertex %i (%i), level %i\n",
-     vy,vy-2<<2,level);*/
-    log_mvb_sz=4-level>>1;
-    mvb_sz=1<<log_mvb_sz;
-    mvg=grid[vy]+_vx;
-    curx=mvg->mv[0];
-    cury=mvg->mv[1];
-    dp_node=_est->dp_nodes;
-    od_mv_dp_col_init(_est,dp_node,_vx,vy,NULL);
-    od_mv_dp_first_col_block_setup(_est,dp_node,_vx,vy);
-    /*fprintf(stderr,"TESTING block SADs:\n");
-    od_mv_dp_get_sad_change8(_est,_ref,dp_node,block_sads[0]);*/
-    /*Compute the set of states for the first node.*/
-    if(vy>=2&&!labels_only){
-      b=od_mv_est_get_boundary_case(state,_vx,vy,curx,cury,
-       1<<_log_dsz,log_mvb_sz+2);
-      nsites=_pattern_nsites[b];
-    }
-    else b=nsites=0;
-    for(sitei=0,site=4;;sitei++){
-      cstate=dp_node[0].states+sitei;
-      cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-      cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-      cstate->prevsi=-1;
-      mvg->mv[0]=cstate->mv[0];
-      mvg->mv[1]=cstate->mv[1];
-      cstate->dr=od_mv_dp_get_rate_change(state,dp_node,
-       &cstate->mv_rate,cstate->pred_mv_rates,-1,_mv_res);
-      cstate->dd=od_mv_dp_get_sad_change8(_est,_ref,dp_node,
-       cstate->block_sads);
-      /*fprintf(stderr,"State: %i  dr: %i  dd: %i  dopt: %i\n",
-       sitei,cstate->dr,cstate->dd,
-       cstate->dr*_est->lambda+(cstate->dd<<OD_LAMBDA_SCALE));*/
-      if(sitei>=nsites)break;
-      site=_pattern[b][sitei];
-    }
-    dp_node[0].nstates=nsites+1;
-    has_gap=0;
-    pmvg=mvg;
-    for(;vy<nvmvbs;){
-      /*Find the next available MV to advance to.*/
-      if(level&1){
-        if(!grid[vy+mvb_sz][_vx].valid){
-          /*fprintf(stderr,"Gap found at %i (%i), stopping\n",vy,vy-2<<2);*/
-          has_gap=1;
-          break;
-        }
-        else if(level>=3)vy++;
-        else if(!grid[vy+1][_vx].valid)vy+=mvb_sz;
-        else vy++;
-      }
-      else if(level>=4)vy++;
-      else if(!grid[vy+(mvb_sz>>1)][_vx].valid)vy+=mvb_sz;
-      else if(level>=2||!grid[vy+1][_vx].valid)vy+=mvb_sz>>1;
-      else vy++;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        od_mv_dp_restore_col_state(dp_node);
-        od_mv_dp_animate_state(state,_ref,dp_node,0);
-      }
-#endif
-      level=OD_MC_LEVEL[vy&3][_vx&3];
-      /*fprintf(stderr,"Continuing DP at vertex %i (%i), level %i\n",
-       vy,vy-2<<2,level);*/
-      log_mvb_sz=4-level>>1;
-      mvb_sz=1<<log_mvb_sz;
-      mvg=grid[vy]+_vx;
-      curx=mvg->mv[0];
-      cury=mvg->mv[1];
-      od_mv_dp_col_init(_est,dp_node+1,_vx,vy,dp_node);
-      od_mv_dp_prev_col_block_setup(_est,dp_node+1,_vx,vy);
-      /*fprintf(stderr,"TESTING block SADs:\n");
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,block_sads[0]);*/
-      /*Compute the set of states for this node.*/
-      if(vy<=nvmvbs-2&&!labels_only){
-        b=od_mv_est_get_boundary_case(state,_vx,vy,curx,cury,
-         1<<_log_dsz,log_mvb_sz+2);
-        nsites=_pattern_nsites[b];
-      }
-      else nsites=0;
-      for(sitei=0,site=4;;sitei++){
-        cstate=dp_node[1].states+sitei;
-        cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-        cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-        best_si=pmvg->down?dp_node[0].nstates:0;
-        best_dr=dp_node[0].states[0].dr;
-        best_dd=dp_node[0].states[0].dd;
-        best_cost=INT_MAX;
-        mvg->mv[0]=cstate->mv[0];
-        mvg->mv[1]=cstate->mv[1];
-        for(si=0;si<dp_node[0].nstates;si++){
-          pstate=dp_node[0].states+si;
-          /*Get the rate change for this state using previous state si.
-            This automatically loads the required bits of the trellis path into
-             the grid, like the previous MV.*/
-          cstate->dr=od_mv_dp_get_rate_change(state,dp_node+1,
-            cur_mv_rates+si,pred_mv_rates[si],si,_mv_res);
-          /*Test against the previous state with a B edge.*/
-          if(_est->flags&OD_MC_USEB){
-            pmvg->down=0;
-            dr=pstate->dr+cstate->dr;
-            /*Account for label mismatch.*/
-            /*if(pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates)dr+=2;*/
-            dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-             block_sads[si]);
-            cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-            /*fprintf(stderr,"State: %2i  P.State: %iB  dr: %3i  dd: %6i  dopt: %7i\n",
-             sitei,si,dr,dd,cost);*/
-            if(cost<best_cost){
-              best_si=si;
-              best_cost=cost;
-              best_dd=dd;
-              best_dr=dr;
-            }
-          }
-          if(_est->flags&OD_MC_USEV){
-            /*Test against the previous state with a V edge.*/
-            pmvg->down=1;
-            dr=pstate->dr+cstate->dr;
-            /*Account for label mismatch.*/
-            /*if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;*/
-            dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-             block_sads[si+dp_node[0].nstates]);
-            cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-            /*fprintf(stderr,"State: %2i  P.State: %iV  dr: %3i  dd: %6i  dopt: %7i\n",
-             sitei,si,dr,dd,cost);*/
-            if(cost<best_cost){
-              best_si=si+dp_node[0].nstates;
-              best_cost=cost;
-              best_dd=dd;
-              best_dr=dr;
-            }
-          }
-        }
-        /*fprintf(stderr,"State: %2i  Best P.State: %i%c\n",
-         sitei,best_si%dp_node[0].nstates,best_si>dp_node[0].nstates?'V':'B');*/
-        cstate->prevsi=best_si;
-        cstate->dr=best_dr;
-        cstate->dd=best_dd;
-        memcpy(cstate->block_sads,block_sads[cstate->prevsi],
-         sizeof(block_sads[0][0])*dp_node[1].nblocks);
-        if(best_si<dp_node[0].nstates){
-          cstate->mv_rate=cur_mv_rates[best_si];
-          memcpy(cstate->pred_mv_rates,pred_mv_rates[best_si],
-           sizeof(pred_mv_rates[0][0])*dp_node[1].npredicted);
-        }
-        else{
-          cstate->mv_rate=cur_mv_rates[best_si-dp_node[0].nstates];
-          memcpy(cstate->pred_mv_rates,
-           pred_mv_rates[best_si-dp_node[0].nstates],
-           sizeof(pred_mv_rates[0][0])*dp_node[1].npredicted);
-        }
-        if(sitei>=nsites)break;
-        site=_pattern[b][sitei];
-      }
-      dp_node[1].nstates=nsites+1;
-      dp_node++;
-      pmvg=mvg;
-    }
-    /*fprintf(stderr,"Finished DP at vertex %i (%i)\n",
-     dp_node[0].mv->vy,dp_node[0].mv->vy-2<<2);*/
-    best_si=pmvg->down?dp_node[0].nstates:0;
-    best_cost=INT_MAX;
-    /*TODO: Once we stop optimizing at arbitrary places, we'll need to
-       compute the rate change of MVs we didn't get to.*/
-    dp_node[1].npredicted=dp_node[1].npred_changeable=0;
-    if(dp_node[0].mv->vy<nvmvbs-2){
-      od_mv_dp_last_col_block_setup(_est,dp_node+1,_vx,dp_node[0].mv->vy);
-      /*fprintf(stderr,"TESTING block SADs:\n");
-      pmvg->mv[0]=dp_node[0].original_mv[0];
-      pmvg->mv[1]=dp_node[0].original_mv[1];
-      od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,block_sads[0]);*/
-      for(si=0;si<dp_node[0].nstates;si++){
-        pstate=dp_node[0].states+si;
-        pmvg->mv[0]=pstate->mv[0];
-        pmvg->mv[1]=pstate->mv[1];
-        /*Test against the state with a following B edge.*/
-        if(_est->flags&OD_MC_USEB){
-          pmvg->down=0;
-          dr=pstate->dr;
-          /*Account for label mismatch.*/
-          /*if(!has_gap&&pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates){
-            rate+=2;
-          }*/
-          dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-           block_sads[si]);
-          cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: --  P.State: %iB  dr: %3i  dd: %6i  dopt: %7i\n",
-           si,dr,dd,cost);*/
-          if(best_si<0||cost<best_cost){
-            best_si=si;
-            best_cost=cost;
-          }
-        }
-        /*Test against the state with a following V edge.
-          If we hit a gap, then the edge label does not actually affect
-           anything, so we can skip these tests if we did the ones above.*/
-        if((_est->flags&OD_MC_USEV)&&(!has_gap||!(_est->flags&OD_MC_USEB))){
-          pmvg->down=1;
-          dr=pstate->dr;
-          /*Account for label mismatch.*/
-          /*if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;*/
-          dd=pstate->dd+od_mv_dp_get_sad_change8(_est,_ref,dp_node+1,
-           block_sads[si+dp_node[0].nstates]);
-          cost=dr*_est->lambda+(dd<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: --  P.State: %iV  dr: %3i  dd: %6i  dopt: %7i\n",
-           si,dr,dd,cost);*/
-          if(cost<best_cost){
-            best_si=si+dp_node[0].nstates;
-            best_cost=cost;
-          }
-        }
-      }
-    }
-    /*There are no blocks to accumulate SAD after this one, so pick the best
-       state so far.*/
-    else{
-      dp_node[1].nblocks=0;
-      for(si=0;si<dp_node[0].nstates;si++){
-        dp_node[1].nblocks=0;
-        pstate=dp_node[0].states+si;
-        cost=pstate->dr*_est->lambda+(pstate->dd<<OD_LAMBDA_SCALE);
-        if(best_si<0||cost<best_cost){
-          best_si=si;
-          best_cost=cost;
-        }
-      }
-      if(pmvg->down)best_si+=dp_node[0].nstates;
-    }
-    /*fprintf(stderr,"Best P.State: %i%c dopt: %7i\n",
-     best_si%dp_node[0].nstates,best_si>dp_node[0].nstates?'V':'B',best_cost);*/
-    if(best_cost>0){
-      /*Our optimal path is worse than what we started with!
-        Restore the original state and give up.*/
-      fprintf(stderr,"Best cost (%7i) > 0! Optimization failed.\n",best_cost);
-      od_mv_dp_restore_col_state(dp_node);
-    }
-    else{
-      int bi;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        char iter_label[16];
-        od_mv_dp_restore_col_state(dp_node);
-        od_mv_dp_animate_state(state,_ref,dp_node,0);
-        od_mv_dp_install_col_state(dp_node+1,best_si);
-        od_state_mc_predict(state,_ref);
-        od_state_fill_vis(state);
-        sprintf(iter_label,"ani%08i",state->ani_iter++);
-        od_state_dump_img(state,&state->vis_img,iter_label);
-      }
-#endif
-      /*Update the state along the optimal path.*/
-      od_mv_dp_install_col_state(dp_node+1,best_si);
-      /*Store the SADs from this last node, too.*/
-      for(bi=0;bi<dp_node[1].nblocks;bi++){
-        dp_node[1].blocks[bi]->sad=block_sads[best_si][bi];
-      }
-      dcost+=best_cost;
-    }
-  }
-  od_mv_est_check_rd_state(_est,_ref,_mv_res);
-  return dcost;
-}
-
-#if 0
-static void od_mv_dp_first_col_block_setup(od_state *_state,od_mv_dp_node *_dp,
- int _vx,int _vy){
-  int nhmvbs;
-  int level;
-  int log_mvb_sz;
-  int mvb_sz;
-  int nblocks;
-  nhmvbs=_state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vx>=mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx-mvb_sz,_vy-mvb_sz,log_mvb_sz);
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx,_vy-mvb_sz,log_mvb_sz);
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int log_mvb_off;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vx>=mvb_sz){
-      if(_state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_off,_vy-mvb_off,log_mvb_off);
-        /*No need to test _state->mv_grid[_vy-mvb_off][_vx].
-          If it was valid, we wouldn't be here.*/
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_off,_vy-(mvb_off<<1),log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx-mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx-(mvb_off<<1),_vy-mvb_off,log_mvb_off);
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_sz,_vy-mvb_sz,log_mvb_sz);
-      }
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      if(_state->mv_grid[_vy-half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy-(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy-mvb_off,log_mvb_off);
-        /*No need to test _state->mv_grid[_vy-mvb_off][_vx].
-          If it was valid, we wouldn't be here.*/
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy-(mvb_off<<1),log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx+mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx+mvb_off,_vy-mvb_off,log_mvb_off);
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy-mvb_sz,log_mvb_sz);
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_prev_col_block_setup(od_state *_state,od_mv_dp_node *_dp,
- int _vx,int _vy){
-  int nhmvbs;
-  int level;
-  int log_mvb_sz;
-  int mvb_sz;
-  int nblocks;
-  nhmvbs=_state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vx>=mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx-mvb_sz,_vy-mvb_sz,log_mvb_sz);
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx,_vy-mvb_sz,log_mvb_sz);
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int log_mvb_off;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vx>=mvb_sz){
-      if(_state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_off,_vy-mvb_off,log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx-mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx-(mvb_off<<1),_vy-mvb_off,log_mvb_off);
-        }
-        if(!_state->mv_grid[_vy-mvb_off][_vx].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx-mvb_off,_vy-(mvb_off<<1),log_mvb_off);
-          if(!_state->mv_grid[_vy-(mvb_off<<1)][_vx-mvb_off].valid){
-            od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-             _vx-(mvb_off<<1),_vy-(mvb_off<<1),log_mvb_off);
-          }
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_sz,_vy-mvb_sz,log_mvb_sz);
-      }
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      if(_state->mv_grid[_vy-half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy-(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy-mvb_off,log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx+mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx+mvb_off,_vy-mvb_off,log_mvb_off);
-        }
-        if(!_state->mv_grid[_vy-mvb_off][_vx].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx,_vy-(mvb_off<<1),log_mvb_off);
-          if(!_state->mv_grid[_vy-(mvb_off<<1)][_vx+mvb_off].valid){
-            od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-             _vx+mvb_off,_vy-(mvb_off<<1),log_mvb_off);
-          }
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy-mvb_sz,log_mvb_sz);
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_last_col_block_setup(od_state *_state,od_mv_dp_node *_dp,
- int _vx,int _vy){
-  int nhmvbs;
-  int level;
-  int log_mvb_sz;
-  int mvb_sz;
-  int nblocks;
-  nhmvbs=_state->nhmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vx>=mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx-mvb_sz,_vy,log_mvb_sz);
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-       _vx,_vy,log_mvb_sz);
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int log_mvb_off;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vx>=mvb_sz){
-      if(_state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_off,_vy,log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx-mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx-(mvb_off<<1),_vy,log_mvb_off);
-        }
-        if(!_state->mv_grid[_vy+mvb_off][_vx].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx-mvb_off,_vy+mvb_off,log_mvb_off);
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx-mvb_sz,_vy,log_mvb_sz);
-      }
-    }
-    if(_vx<=nhmvbs-mvb_sz){
-      if(_state->mv_grid[_vy+half_mvb_sz][_vx+half_mvb_sz].valid){
-        if(level>0||
-         !_state->mv_grid[_vy+(half_mvb_sz>>1)][_vx+(half_mvb_sz>>1)].valid){
-          log_mvb_off=log_mvb_sz-1;
-          mvb_off=half_mvb_sz;
-        }
-        else{
-          mvb_off=half_mvb_sz>>1;
-          log_mvb_off=log_mvb_sz-2;
-        }
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy,log_mvb_off);
-        if(!_state->mv_grid[_vy][_vx+mvb_off].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx+mvb_off,_vy,log_mvb_off);
-        }
-        if(!_state->mv_grid[_vy+mvb_off][_vx].valid){
-          od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-           _vx,_vy+mvb_off,log_mvb_off);
-        }
-      }
-      else{
-        od_mv_dp_setup_block(_state,_dp->blocks+nblocks++,
-         _vx,_vy,log_mvb_sz);
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_est_refine_col(od_mv_est_ctx *_est,int _ref,int _vx,
- int _log_dsz,const int *_pattern_nsites,const od_pattern *_pattern){
-  od_state        *state;
-  od_mv_grid_pt  **grid;
-  od_mv_grid_pt   *pmvg;
-  od_mv_grid_pt   *mvg;
-  od_mv_dp_node   *dp_node;
-  od_mv_dp_state  *cstate;
-  od_mv_dp_state  *pstate;
-  int              pred[2];
-  int              nhmvbs;
-  int              nvmvbs;
-  int              mv_res;
-  int              level;
-  int              log_mvb_sz;
-  int              mvb_sz;
-  int              labels_only;
-  int              nsites;
-  int              sitei;
-  int              site;
-  int              prevx;
-  int              prevy;
-  int              petype;
-  int              curx;
-  int              cury;
-  int              vy;
-  int              b;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  labels_only=_vx<2||_vx>nhmvbs-2;
-  mv_res=OD_MINI(_log_dsz,2);
-  /*fprintf(stderr,"Refining col %i (%i)...\n",_vx,_vx-2<<2);*/
-  grid=state->mv_grid;
-  for(vy=0;;vy++){
-    ogg_int32_t best_cost;
-    ogg_int32_t cost;
-    ogg_int32_t best_sad;
-    ogg_int32_t sad;
-    int         best_rate;
-    int         rate;
-    int         best_si;
-    int         si;
-    int         has_gap;
-    for(;vy<=nvmvbs&&!grid[vy][_vx].valid;vy++);
-    if(vy>nvmvbs)break;
-    level=OD_MC_LEVEL[vy&3][_vx&3];
-    /*fprintf(stderr,"Starting DP at vertex %i (%i), level %i\n",
-     vy,vy-2<<2,level);*/
-    log_mvb_sz=4-level>>1;
-    mvb_sz=1<<log_mvb_sz;
-    mvg=grid[vy]+_vx;
-    curx=mvg->mv[0];
-    cury=mvg->mv[1];
-    od_state_get_predictor(state,pred,_vx,vy,level,mv_res);
-    dp_node=_est->dp_nodes;
-    dp_node[0].vi=vy;
-    if(vy>=2&&!labels_only){
-      b=od_mv_est_get_boundary_case(state,_vx,vy,curx,cury,
-       1<<_log_dsz,log_mvb_sz+2);
-      nsites=_pattern_nsites[b];
-    }
-    else nsites=0;
-    /*Compute the set of states for the first node.*/
-    if(vy>2&&!labels_only)od_mv_dp_first_col_block_setup(state,dp_node,_vx,vy);
-    else dp_node->nblocks=0;
-    for(sitei=0,site=4;;sitei++){
-      cstate=dp_node[0].states+sitei;
-      cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-      cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-      cstate->prevsi=-1;
-      if(vy<=2||labels_only){
-        cstate->sad=0;
-        cstate->rate=0;
-      }
-      else{
-        cstate->rate=od_mv_est_bits(
-         (cstate->mv[0]>>mv_res)-pred[0],(cstate->mv[1]>>mv_res)-pred[1]);
-        mvg->mv[0]=cstate->mv[0];
-        mvg->mv[1]=cstate->mv[1];
-        cstate->sad=od_mv_dp_sad8(_est,_ref,dp_node);
-      }
-      /*fprintf(stderr,"State: %i  Rate: %i  SAD: %i\n",
-       sitei,cstate->rate,cstate->sad);*/
-      if(sitei>=nsites)break;
-      site=_pattern[b][sitei];
-    }
-    dp_node[0].nstates=nsites+1;
-    mvg->mv[0]=curx;
-    mvg->mv[1]=cury;
-    has_gap=0;
-    pmvg=mvg;
-    for(;vy<nvmvbs;){
-      od_mv_dp_node *pred_node;
-      od_mv_grid_pt *pred_mvg;
-      int            predx;
-      int            predy;
-      prevx=curx;
-      prevy=cury;
-      petype=pmvg->down;
-      if(level&1){
-        vy+=mvb_sz;
-        if(vy>nvmvbs||!grid[vy][_vx].valid){
-          /*fprintf(stderr,"Gap found at %i (%i), stopping\n",vy,vy-2<<2);*/
-          has_gap=1;
-          break;
-        }
-      }
-      else if(level>=4)vy++;
-      else if(!grid[vy+(mvb_sz>>1)][_vx].valid)vy+=mvb_sz;
-      else if(level>=2||!grid[vy+1][_vx].valid)vy+=mvb_sz>>1;
-      else vy++;
-      level=OD_MC_LEVEL[vy&3][_vx&3];
-      /*fprintf(stderr,"Continuing DP at vertex %i (%i), level %i\n",
-       vy,vy-2<<2,level);*/
-      log_mvb_sz=4-level>>1;
-      mvb_sz=1<<log_mvb_sz;
-      mvg=grid[vy]+_vx;
-      curx=mvg->mv[0];
-      cury=mvg->mv[1];
-      if((level&1)||vy<2||vy>nvmvbs-2||labels_only)pred_node=NULL;
-      else pred_node=od_mv_dp_get_pred_node(dp_node,vy-mvb_sz);
-      if(pred_node==NULL){
-        od_state_get_predictor(state,pred,_vx,vy,level,mv_res);
-      }
-      else{
-        pred_mvg=grid[pred_node->vi]+_vx;
-        predx=pred_mvg->mv[0];
-        predy=pred_mvg->mv[1];
-      }
-      dp_node[1].vi=vy;
-      if(vy<=nvmvbs-2&&!labels_only){
-        b=od_mv_est_get_boundary_case(state,_vx,vy,curx,cury,
-         1<<_log_dsz,log_mvb_sz+2);
-        nsites=_pattern_nsites[b];
-      }
-      /*The first node of every 4th column has its motion vector fixed.
-        Do not move it.*/
-      else nsites=0;
-      /*Compute the set of states for the first node.*/
-      od_mv_dp_prev_col_block_setup(state,dp_node+1,_vx,vy);
-      for(sitei=0,site=4;;sitei++){
-        cstate=dp_node[1].states+sitei;
-        cstate->mv[0]=curx+(OD_SQUARE_DX[site]<<_log_dsz);
-        cstate->mv[1]=cury+(OD_SQUARE_DY[site]<<_log_dsz);
-        cstate->prevsi=-1;
-        /*If no previous node is used as a predictor for this vector, compute
-           this MV's rate, once.*/
-        if(pred_node==NULL){
-          cstate->rate=od_mv_est_bits(
-           (cstate->mv[0]>>mv_res)-pred[0],(cstate->mv[1]>>mv_res)-pred[1]);
-        }
-        mvg->mv[0]=cstate->mv[0];
-        mvg->mv[1]=cstate->mv[1];
-        for(si=0;si<dp_node[0].nstates;si++){
-          pstate=dp_node[0].states+si;
-          pmvg->mv[0]=pstate->mv[0];
-          pmvg->mv[1]=pstate->mv[1];
-          if(pred_node!=NULL){
-            od_mv_dp_node *pnode;
-            int            pred_si;
-            /*Find the state in the predictor that would be active if we chose
-               this state in the current node.*/
-            pred_si=si;
-            for(pnode=dp_node;pnode!=pred_node;pnode--){
-              pred_si=pnode->states[pred_si].prevsi;
-            }
-            /*Compute the new predictor and MV rate.*/
-            pred_mvg->mv[0]=pred_node->states[pred_si].mv[0];
-            pred_mvg->mv[1]=pred_node->states[pred_si].mv[1];
-            od_state_get_predictor(state,pred,_vx,vy,level,mv_res);
-            cstate->rate=od_mv_est_bits(
-             (cstate->mv[0]>>mv_res)-pred[0],(cstate->mv[1]>>mv_res)-pred[1]);
-          }
-          /*Test against the previous state with a B edge.*/
-          pmvg->down=0;
-          rate=pstate->rate+cstate->rate;
-          /*Account for label mismatch.*/
-          if(pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates)rate+=2;
-          sad=pstate->sad+od_mv_dp_sad8(_est,_ref,dp_node+1);
-          cost=rate*_est->lambda+(sad<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: %2i  P.State: %iB  Rate: %3i  SAD: %6i  Cost: %7i\n",
-           sitei,si,rate,sad,cost);*/
-          if(cstate->prevsi<0||cost<best_cost){
-            cstate->prevsi=si;
-            best_cost=cost;
-            best_sad=sad;
-            best_rate=rate;
-          }
-          /*Test against the previous state with a V edge.*/
-          pmvg->down=1;
-          rate=pstate->rate+cstate->rate;
-          /*Account for label mismatch.*/
-          if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;
-          sad=pstate->sad+od_mv_dp_sad8(_est,_ref,dp_node+1);
-          cost=rate*_est->lambda+(sad<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: %2i  P.State: %iV  Rate: %3i  SAD: %6i  Cost: %7i\n",
-           sitei,si,rate,sad,cost);*/
-          if(cost<best_cost){
-            cstate->prevsi=si+dp_node[0].nstates;
-            best_cost=cost;
-            best_sad=sad;
-            best_rate=rate;
-          }
-        }
-        /*fprintf(stderr,"State: %2i  Best P.State: %i%c\n",
-         sitei,cstate->prevsi%dp_node[0].nstates,
-         cstate->prevsi>dp_node[0].nstates?'V':'B');*/
-        cstate->rate=best_rate;
-        cstate->sad=best_sad;
-        if(sitei>=nsites)break;
-        site=_pattern[b][sitei];
-      }
-      dp_node[1].nstates=nsites+1;
-      dp_node++;
-      /*Restore the state we were optimizing.*/
-      if(pred_node!=NULL){
-        pred_mvg->mv[0]=predx;
-        pred_mvg->mv[1]=predy;
-      }
-      pmvg->mv[0]=prevx;
-      pmvg->mv[1]=prevy;
-      pmvg->down=petype;
-      mvg->mv[0]=curx;
-      mvg->mv[1]=cury;
-      pmvg=mvg;
-    }
-    /*fprintf(stderr,"Finished DP at vertex %i (%i)\n",
-     dp_node[0].vi,dp_node[0].vi-2<<2);*/
-    best_si=-1;
-    if(dp_node[0].vi<nvmvbs-2){
-      od_mv_dp_last_col_block_setup(state,dp_node+1,_vx,dp_node[0].vi);
-      for(si=0;si<dp_node[0].nstates;si++){
-        pstate=dp_node[0].states+si;
-        pmvg->mv[0]=pstate->mv[0];
-        pmvg->mv[1]=pstate->mv[1];
-        /*Test against the state with a following B edge.*/
-        pmvg->down=0;
-        rate=pstate->rate;
-        /*Account for label mismatch.*/
-        if(!has_gap&&pstate->prevsi>=0&&pstate->prevsi>=(dp_node-1)->nstates){
-          rate+=2;
-        }
-        sad=pstate->sad+od_mv_dp_sad8(_est,_ref,dp_node+1);
-        cost=rate*_est->lambda+(sad<<OD_LAMBDA_SCALE);
-        /*fprintf(stderr,"State: --  P.State: %iB  Rate: %3i  SAD: %6i  Cost: %7i\n",
-         si,pstate->rate,sad,cost);*/
-        if(best_si<0||cost<best_cost){
-          best_si=si;
-          best_cost=cost;
-        }
-        /*Test against the state with a following V edge.
-          If we hit a gap, then the edge label does not actually affect anything,
-           so we can skip these tests.*/
-        if(!has_gap){
-          pmvg->down=1;
-          rate=pstate->rate;
-          /*Account for label mismatch.*/
-          if(pstate->prevsi>=0&&pstate->prevsi<(dp_node-1)->nstates)rate+=2;
-          sad=pstate->sad+od_mv_dp_sad8(_est,_ref,dp_node+1);
-          cost=rate*_est->lambda+(sad<<OD_LAMBDA_SCALE);
-          /*fprintf(stderr,"State: --  P.State: %iV  Rate: %3i  SAD: %6i  Cost: %7i\n",
-           si,pstate->rate,sad,cost);*/
-          if(cost<best_cost){
-            best_si=si+dp_node[0].nstates;
-            best_cost=cost;
-          }
-        }
-      }
-    }
-    /*There are no blocks to accumulate SAD after this one, so pick the best
-       state so far.*/
-    else for(si=0;si<dp_node[0].nstates;si++){
-      pstate=dp_node[0].states+si;
-      cost=pstate->rate*_est->lambda+(pstate->sad<<OD_LAMBDA_SCALE);
-      if(best_si<0||cost<best_cost){
-        best_si=si;
-        best_cost=cost;
-      }
-    }
-    /*fprintf(stderr,"Best P.State: %i%c\n",
-     best_si%dp_node[0].nstates,best_si>dp_node[0].nstates?'V':'B');*/
-    /*Update the MV state along the optimal path.*/
-    for(;;){
-      if(best_si>=dp_node[0].nstates){
-        pmvg->down=1;
-        best_si-=dp_node[0].nstates;
-      }
-      else pmvg->down=0;
-      pstate=dp_node[0].states+best_si;
-      pmvg->mv[0]=pstate->mv[0];
-      pmvg->mv[1]=pstate->mv[1];
-      best_si=pstate->prevsi;
-      if(best_si<0)break;
-      dp_node--;
-      pmvg=grid[dp_node[0].vi]+_vx;
-    }
-  }
-}
-#endif
-
-static ogg_int32_t od_mv_est_refine(od_mv_est_ctx *_est,int _ref,int _log_dsz,
- int _mv_res,const int *_pattern_nsites,const od_pattern *_pattern){
-  od_state    *state;
-  ogg_int32_t  dcost;
-  int          nhmvbs;
-  int          nvmvbs;
-  int          vx;
-  int          vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  fprintf(stderr,
-   "Refining with displacements of %0g and 1/%i pel MV resolution.\n",
-   (1<<_log_dsz)*0.125,1<<3-_mv_res);
-  dcost=0;
-  for(vy=0;vy<=nvmvbs;vy++)/*if(_est->col_counts[vy])*/{
-    dcost+=od_mv_est_refine_row(_est,_ref,vy,_log_dsz,_mv_res,
-     _pattern_nsites,_pattern);
-  }
-  for(vx=0;vx<=nhmvbs;vx++)/*if(_est->row_counts[vx])*/{
-    dcost+=od_mv_est_refine_col(_est,_ref,vx,_log_dsz,_mv_res,
-     _pattern_nsites,_pattern);
-  }
-  return dcost;
-}
-
-
-
-/*STAGE 4: Sub-pel Refinement.*/
-
-
-
-/*Stores the full-pel MVs for use by EPZS^2 in the next frame before sub-pel
-   refinement.*/
-void od_mv_est_update_fullpel_mvs(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  for(vy=2;vy<=nvmvbs-2;vy++)for(vx=2;vx<=nhmvbs-2;vx++){
-    od_mv_grid_pt *mvg;
-    od_mv_node    *mv;
-    mvg=state->mv_grid[vy]+vx;
-    if(!mvg->valid)continue;
-    mv=_est->mvs[vy]+vx;
-    mv->mvs[0][_ref][0]=mvg->mv[0]>>3;
-    mv->mvs[0][_ref][1]=mvg->mv[1]>>3;
-  }
-}
-
-/*Sets the mv_rate of each node in the mesh, using the given MV resolution.
-  Returns the change in rate.*/
-int od_mv_est_update_mv_rates(od_mv_est_ctx *_est,int _mv_res){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  int       dr;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  dr=0;
-  for(vy=2;vy<=nvmvbs-2;vy++)for(vx=2;vx<=nhmvbs-2;vx++){
-    od_mv_grid_pt *mvg;
-    od_mv_node    *mv;
-    int            pred[2];
-    mvg=state->mv_grid[vy]+vx;
-    if(!mvg->valid)continue;
-    mv=_est->mvs[vy]+vx;
-    od_state_get_predictor(state,pred,vx,vy,OD_MC_LEVEL[vy&3][vx&3],_mv_res);
-    dr-=mv->mv_rate;
-    mv->mv_rate=od_mv_est_bits(
-     (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-    dr+=mv->mv_rate;
-  }
-  return dr;
-}
-
-
-od_mv_est_ctx *od_mv_est_alloc(od_enc_ctx *_enc){
-  od_mv_est_ctx *ret;
-  ret=(od_mv_est_ctx *)_ogg_malloc(sizeof(od_mv_est_ctx));
-  od_mv_est_init(ret,_enc);
-  return ret;
-}
-
-void od_mv_est_free(od_mv_est_ctx *_est){
-  if(_est!=NULL){
-    od_mv_est_clear(_est);
-    _ogg_free(_est);
-  }
-}
-
-void od_mv_subpel_refine(od_mv_est_ctx *_est,int _ref,int _cost_thresh){
-  od_state       *state;
-  od_mv_grid_pt **grid;
-  ogg_int32_t     dcost;
-  ogg_int32_t     subpel_cost;
-  int             cost_thresh;
-  int             nhmvbs;
-  int             nvmvbs;
-  int             mv_res;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  cost_thresh=_cost_thresh;
-  /*Save the fullpell MVs now for use by EPZS^2 on the next frame.
-    We could also try rounding the results after refinement, I guess.
-    I'm not sure it makes much difference*/
-  od_mv_est_update_fullpel_mvs(_est,_ref);
-  do dcost=od_mv_est_refine(_est,_ref,2,2,OD_DIAMOND_NSITES,OD_DIAMOND_SITES);
-  while(dcost<cost_thresh);
-  for(mv_res=2;mv_res-->_est->mv_res_min;){
-    subpel_cost=od_mv_est_update_mv_rates(_est,mv_res)*_est->lambda;
-    /*If the rate penalty for refining is small, bump the termination threshold
-       down to make sure we actually get a decent improvement.
-      We make sure not to let it get too small, however, so we're not here all
-       day (a motion field of all (0,0)'s would have a rate penalty of 0!).*/
-    cost_thresh=OD_MAXI(cost_thresh,-OD_MAXI(subpel_cost,16<<OD_LAMBDA_SCALE));
-    memcpy(_est->refine_grid[0],state->mv_grid[0],
-     sizeof(state->mv_grid[0][0])*(nhmvbs+1)*(nvmvbs+1));
-    do{
-      dcost=od_mv_est_refine(_est,_ref,mv_res,mv_res,
-       OD_DIAMOND_NSITES,OD_DIAMOND_SITES);
-      subpel_cost+=dcost;
-    }
-    while(dcost<cost_thresh);
-    if(subpel_cost>0){
-      fprintf(stderr,"1/%i refinement FAILED:    dopt %7i\n",
-       1<<3-mv_res,subpel_cost);
-      grid=_est->refine_grid;
-      _est->refine_grid=state->mv_grid;
-      state->mv_grid=grid;
-      break;
-    }
-    else fprintf(stderr,"1/%i refinement SUCCEEDED: dopt %7i\n",
-     1<<3-mv_res,subpel_cost);
-  }
-}
-
-void od_mv_est(od_mv_est_ctx *_est,int _ref,int _lambda){
-  ogg_int32_t dcost;
-  int         cost_thresh;
-  int         nhmvbs;
-  int         nvmvbs;
-  nhmvbs=_est->enc->state.nhmbs+1<<2;
-  nvmvbs=_est->enc->state.nvmbs+1<<2;
-  _est->lambda=_lambda;
-  _est->mvapw[_ref][0]=0x20000;
-  _est->mvapw[_ref][1]=0x10000;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  /*Set some initial state.
-    This would get reset eventually by the algorithm in a more convenient
-     place, but is needed earlier by the visualization.*/
-  if(daala_granule_basetime(&_est->enc->state,_est->enc->state.cur_time)==
-   ANI_FRAME){
-    int vx;
-    int vy;
-    for(vy=0;vy<nvmvbs;vy++){
-      od_mv_grid_pt *grid;
-      grid=_est->enc->state.mv_grid[vy];
-      for(vx=0;vx<nhmvbs;vx++){
-        grid[vx].valid=0;
-        grid[vx].right=0;
-        grid[vx].down=0;
-        grid[vx].mv[0]=0;
-        grid[vx].mv[1]=1;
-      }
-    }
-  }
-#endif
-  od_mv_est_init_mvs(_est,_ref);
-  od_mv_est_decimate(_est,_ref);
-  /*This threshold is somewhat arbitrary.
-    Chen and Willson use 6000 (with SSD as an error metric).
-    We would like something more dependent on the frame size.
-    For CIF, there are a maximum of 6992 vertices in the mesh, which is pretty
-     close to 6000.
-    With a SAD error metric like we use, the square root of 6000 would be a
-     more appropriate value, however that gives a PSNR improvement of less than
-     0.01 dB, and requires almost twice as many iterations to achieve.*/
-  cost_thresh=-nhmvbs*nvmvbs<<OD_LAMBDA_SCALE;
-#if 0
-  /*Logarithmic search.*/
-  do{
-    dcost=0;
-    dcost+=od_mv_est_refine(_est,_ref,5,2,OD_SQUARE_NSITES,OD_SQUARE_SITES);
-    dcost+=od_mv_est_refine(_est,_ref,4,2,OD_SQUARE_NSITES,OD_SQUARE_SITES);
-    dcost+=od_mv_est_refine(_est,_ref,3,2,OD_SQUARE_NSITES,OD_SQUARE_SITES);
-  }
-  while(dcost<cost_thresh);
-#else
-  /*Diamond search.
-    This appears to give the same quality as the logarithmic search, but at
-     nearly 10 times the speed.*/
-  do dcost=od_mv_est_refine(_est,_ref,3,2,OD_DIAMOND_NSITES,OD_DIAMOND_SITES);
-  while(dcost<cost_thresh);
-#endif
-  od_mv_subpel_refine(_est,_ref,cost_thresh);
-}
diff --git a/src/mcenc-nolevel.c b/src/mcenc-nolevel.c
deleted file mode 100644 (file)
index e703a2f..0000000
+++ /dev/null
@@ -1,4414 +0,0 @@
-/*Daala video codec
-Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <limits.h>
-#include <string.h>
-
-/*TODO:
- - Develope a real encoding and measure real bits.
- - Thresholds for DP.
-   + How do we calculate them?
-   + How do they propagate between frames (block sizes change)
-   + Compute rate change of trailing MVs correctly.
- - Compute bits needed for labels during DP (to bias towards using the same
-    label).
- - Allow setting a maximum decimation level.
- - Allow setting a maximum refinement level.*/
-
-/*The frame number to animate.*/
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-#define ANI_FRAME (69)
-#endif
-
-#define OD_MC_USEB (1<<0)
-#define OD_MC_USEV (1<<1)
-
-typedef struct od_mv_node            od_mv_node;
-typedef struct od_mv_dp_state        od_mv_dp_state;
-typedef struct od_mv_dp_node         od_mv_dp_node;
-typedef struct od_mv_err_node        od_mv_err_node;
-
-#include "mc.h"
-#include "encint.h"
-
-typedef int          od_offset[2];
-typedef int          od_pattern[8];
-typedef ogg_uint16_t od_sad4[4];
-
-
-
-/*The state information used by the motion estimation process that is not
-   required by the decoder.
-  Some of this information corresponds to a vertex in the MV mesh.
-  Other pieces correspond to a block whose upper-left corner is located at that
-   vertex.*/
-struct od_mv_node{
-  /*The historical motion vectors for EPZS^2, stored at full-pel resolution.
-    Indexed by [time][reference_type][component].*/
-  int           mvs[3][2][2];
-  /*The current estimated rate of this MV.*/
-  unsigned      mv_rate:16;
-  /*The current estimated rate of the edge labels.*/
-  unsigned      lb_rate:4;
-  /*The number of blocks influenced by this MV who failed their SAD checks.*/
-  unsigned      needs_check:4;
-  /*The current size of the block with this MV at its upper-left.*/
-  unsigned      log_mvb_sz:2;
-  /*The index of the exterior corner of that block.*/
-  unsigned      c:2;
-  /*The edge splitting index of that block.*/
-  unsigned      s:2;
-  /*The current distortion of that block.*/
-  ogg_int32_t   sad;
-  /*The SAD for BMA predictor centered on this node.
-    Used for the dynamic thresholds of the initial EPZS^2 pass.*/
-  ogg_int32_t   bma_sad;
-  /*The location of this node in the grid.
-    Used to retrieve it after pulling it off the decimation heap.*/
-  int           vx;
-  int           vy;
-  /*The change in global distortion for decimating this node.*/
-  ogg_int32_t   dd;
-  /*The change in global rate for decimating this node.*/
-  int           dr;
-  /*The position of this node in the heap.*/
-  int           heapi;
-};
-
-#define OD_DP_NSTATES_MAX     (9)
-#define OD_DP_NBLOCKS_MAX     (8)
-#define OD_DP_NPREDICTED_MAX  (17)
-#define OD_DP_NCHANGEABLE_MAX (6)
-
-/*One of the trellis states in the dynamic prgram.*/
-struct od_mv_dp_state{
-  /*The MV to install for this state.*/
-  int           mv[2];
-  /*The best state in the previous DP node to use with this one, or -1 to
-     indicate the start of the path.*/
-  int           prevsi;
-  /*The total rate change (thus far) produced by choosing this path.*/
-  int           dr;
-  /*The total distortion change (thus far) produced by choosing this path.*/
-  ogg_int32_t   dd;
-  /*The new SAD of each block affected by the the DP between this node and the
-     previous node.
-    These are installed if the path is selected.*/
-  ogg_int32_t   block_sads[OD_DP_NBLOCKS_MAX];
-  /*The new rate of each MV predicted by this node.
-    These are installed if the path is selected.
-    These may supersede the rates reported in previous nodes on the path.*/
-  int           pred_mv_rates[OD_DP_NPREDICTED_MAX];
-  /*The new rate of this MV.*/
-  int           mv_rate;
-};
-
-/*A node on the dynamic programming path.*/
-struct od_mv_dp_node{
-  od_mv_grid_pt  *mvg;
-  od_mv_node     *mv;
-  /*The number of states considered in this node.*/
-  int             nstates;
-  /*The number of blocks affected by states in this node.*/
-  int             nblocks;
-  /*The number of MVs predicted by this node.*/
-  int             npredicted;
-  /*The number of those MVs that are potentially changeable by future DP
-     states.*/
-  int             npred_changeable;
-  /*The original MV used by this node.*/
-  int             original_mv[2];
-  /*The original edge label used b this node.*/
-  unsigned char   original_etype:1;
-  /*The original rate of this MV.*/
-  int             original_mv_rate;
-  /*The original MV rates before predictors were changed by this node.
-    This only includes the ones that are actually changeable.*/
-  int             original_mv_rates[OD_DP_NCHANGEABLE_MAX];
-  /*The last node we save/restore in order to perform prediction.*/
-  od_mv_dp_node  *min_predictor_node;
-  /*The set of trellis states.*/
-  od_mv_dp_state  states[OD_DP_NSTATES_MAX];
-  /*Up to 8 blocks can be influenced by this MV and the previous MV.*/
-  od_mv_node     *blocks[OD_DP_NBLOCKS_MAX];
-  /*The vertices whose MV we predict.*/
-  /*Up to 20 MVs can be predicted by this one, but 3 of those are MVs on the
-     DP trellis whose value we have yet to determine.*/
-  od_mv_grid_pt  *predicted_mvgs[OD_DP_NPREDICTED_MAX];
-  od_mv_node     *predicted_mvs[OD_DP_NPREDICTED_MAX];
-};
-
-struct od_mv_est_ctx{
-  od_enc_ctx      *enc;
-  /*A cache of the SAD values used during decimation.
-    Indexed by [vy>>log_mvb_sz][vx>>log_mvb_sz][log_mvb_sz][s], where s is the
-     edge split state.
-    The SAD of top-level blocks (log_mvb_sz==2) is not stored in this cache,
-     since it is only needed once.*/
-  od_sad4        **sad_cache[2];
-  /*The state of the MV mesh specific to the encoder.*/
-  od_mv_node     **mvs;
-  /*A temporary copy of the decoder-side MV grid used to save-and-restore the
-     MVs when attempting sub-pel refinement.*/
-  od_mv_grid_pt  **refine_grid;
-  /*Space for storing the Viterbi trellis used for DP refinment.*/
-  od_mv_dp_node   *dp_nodes;
-  /*The decimation heap.*/
-  od_mv_node     **dec_heap;
-  /*The number of vertices in the decimation heap.*/
-  int              dec_nheap;
-  /*The number of undecimated vertices in each row.*/
-  unsigned        *row_counts;
-  /*The number of undecimated vertices in each column.*/
-  unsigned        *col_counts;
-  /*The weights used to produce the accelerated MV predictor.*/
-  ogg_int32_t      mvapw[2][2];
-  /*Flags indicating which MVs have already been tested during the initial
-    EPZS^2 pass.*/
-  unsigned char    hit_cache[64][64];
-  /*The flag used by the current EPZS search iteration.*/
-  unsigned         hit_bit;
-  /*The Langrangian multiplier used for R-D optimization.*/
-  int              lambda;
-  /*Configuration.*/
-  /*The flags indicating which feature to use.*/
-  int              flags;
-  /*The smallest resolution to refine MVs to.*/
-  int              mv_res_min;
-};
-
-
-
-/*The subdivision level of a MV in the mesh, given its position (mod 4).*/
-static const int OD_MC_LEVEL[4][4]={
-  {0,4,2,4},
-  {4,3,4,3},
-  {2,4,1,4},
-  {4,3,4,3}
-};
-
-/*Ancestor lists for a vertex.
-  These are stored as lists of offsets to the vertices in the domain.
-  Level 0 ancestors are not included, as they cannot be decimated.*/
-/*Lists for level 2 vertices.*/
-static const od_offset OD_ANCESTORS2[2][2]={
-  {{ 0,-2},{ 0, 2}},
-  {{-2, 0},{ 2, 0}},
-};
-/*Lists for level 3 vertices.*/
-static const od_offset OD_ANCESTORS3[4][5]={
-  {{ 1,-1},{-1, 1},{ 1,-3},{-3, 1},{ 1, 1}},
-  {{-1,-1},{ 1, 1},{-1,-3},{-1, 1},{ 3, 1}},
-  {{-1,-1},{ 1, 1},{-3,-1},{ 1,-1},{ 1, 3}},
-  {{ 1,-1},{-1, 1},{-1,-1},{ 3,-1},{-1, 3}},
-};
-/*Lists for level 4 vertices.*/
-static const od_offset OD_ANCESTORS4[8][9]={
-  {{ 0,-1},{ 0, 1},{-1,-2},{ 1, 0},{-1, 2},{-3,-2},{ 1,-2},{-3, 2},{ 1, 2}},
-  {{ 0,-1},{ 0, 1},{ 1,-2},{-1, 0},{ 1, 2},{-1,-2},{ 3,-2},{-1, 2},{ 3, 2}},
-  {{-1, 0},{ 1, 0},{-2,-1},{ 2,-1},{ 0, 1},{-2,-3},{ 2,-3},{-2, 1},{ 2, 1}},
-  {{-1, 0},{ 1, 0},{ 0,-1},{-2, 1},{ 2, 1},{ 0,-3},{-4, 1},{ 0, 1},{ 4, 1}},
-  {{ 0,-1},{ 0, 1},{ 1,-2},{-1, 0},{ 1, 2},{ 1,-4},{-3, 0},{ 1, 0},{ 1, 4}},
-  {{ 0,-1},{ 0, 1},{-1,-2},{ 1, 0},{-1, 2},{-1,-4},{-1, 0},{ 3, 0},{-1, 4}},
-  {{-1, 0},{ 1, 0},{ 0,-1},{-2, 1},{ 2, 1},{-2,-1},{ 2,-1},{-2, 3},{ 2, 3}},
-  {{-1, 0},{ 1, 0},{-2,-1},{ 2,-1},{ 0, 1},{-4,-1},{ 0,-1},{ 4,-1},{ 0, 3}},
-};
-/*The number of ancestors in each list in the grid pattern.*/
-static const int OD_NANCESTORS[4][4]={
-  {0,9,2,9},
-  {9,5,9,5},
-  {2,9,0,9},
-  {9,5,9,5}
-};
-/*The lists for each vertex in the grid pattern.*/
-static const od_offset *OD_ANCESTORS[4][4]={
-  {NULL,            OD_ANCESTORS4[0],OD_ANCESTORS2[0],OD_ANCESTORS4[1]},
-  {OD_ANCESTORS4[2],OD_ANCESTORS3[0],OD_ANCESTORS4[3],OD_ANCESTORS3[1]},
-  {OD_ANCESTORS2[1],OD_ANCESTORS4[4],NULL            ,OD_ANCESTORS4[5]},
-  {OD_ANCESTORS4[6],OD_ANCESTORS3[2],OD_ANCESTORS4[7],OD_ANCESTORS3[3]}
-};
-
-
-
-/*Computes the SAD of the input image against the given predictor.*/
-static ogg_int32_t od_state_sad8(od_state *_state,const unsigned char *_p,
- int _pystride,int _pxstride,int _x,int _y,int _log_blk_sz){
-  od_img_plane        *iplane;
-  const unsigned char *p;
-  unsigned char       *src;
-  unsigned char       *src0;
-  int                  width;
-  int                  height;
-  int                  i;
-  int                  j;
-  ogg_int32_t          ret;
-  width=height=1<<_log_blk_sz;
-  /*TODO: Use picture dimenstions, not frame dimensions.*/
-  if(_x<0){
-    width+=_x;
-    _p-=_x*_pxstride;
-    _x=0;
-  }
-  if(_y<0){
-    height+=_y;
-    _p-=_y*_pystride;
-    _y=0;
-  }
-  if(_x+width>_state->input.width)width=_state->input.width-_x;
-  if(_y+height>_state->input.height)height=_state->input.height-_y;
-  /*fprintf(stderr,"[%i,%i]x[%i,%i]\n",_x,_y,width,height);*/
-  iplane=_state->input.planes+0;
-  src0=iplane->data+_y*iplane->ystride+_x*iplane->xstride;
-  ret=0;
-  for(j=0;j<height;j++){
-    src=src0;
-    p=_p;
-    for(i=0;i<width;i++){
-      ret+=abs(p[0]-src[0]);
-      src+=iplane->xstride;
-      p+=_pxstride;
-    }
-    src0+=iplane->ystride;
-    _p+=_pystride;
-  }
-  return ret;
-}
-
-
-
-static void od_mv_est_init(od_mv_est_ctx *_est,od_enc_ctx *_enc){
-  int nhmvbs;
-  int nvmvbs;
-  int vx;
-  int vy;
-  _est->enc=_enc;
-  nhmvbs=_enc->state.nhmbs+1<<2;
-  nvmvbs=_enc->state.nvmbs+1<<2;
-  _est->sad_cache[1]=(od_sad4 **)od_malloc_2d(nvmvbs>>1,nhmvbs>>1,
-   sizeof(_est->sad_cache[1][0][0]));
-  _est->sad_cache[0]=(od_sad4 **)od_malloc_2d(nvmvbs,nhmvbs,
-   sizeof(_est->sad_cache[1][0][0]));
-  _est->mvs=(od_mv_node **)od_calloc_2d(nvmvbs+1,nhmvbs+1,
-   sizeof(_est->mvs[0][0]));
-  _est->refine_grid=(od_mv_grid_pt **)od_malloc_2d(nvmvbs+1,nhmvbs+1,
-   sizeof(_est->refine_grid[0][0]));
-  _est->dp_nodes=(od_mv_dp_node *)_ogg_malloc(
-   sizeof(od_mv_dp_node)*(OD_MAXI(nhmvbs,nvmvbs)+1));
-  _est->row_counts=(unsigned *)_ogg_malloc(sizeof(unsigned)*(nvmvbs+1));
-  _est->col_counts=(unsigned *)_ogg_malloc(sizeof(unsigned)*(nhmvbs+1));
-  for(vy=0;vy<=nvmvbs;vy++)for(vx=0;vx<=nhmvbs;vx++){
-    _est->mvs[vy][vx].vx=vx;
-    _est->mvs[vy][vx].vy=vy;
-    _est->mvs[vy][vx].heapi=-1;
-    _enc->state.mv_grid[vy][vx].valid=1;
-  }
-  _est->dec_heap=(od_mv_node **)_ogg_malloc(
-   (nvmvbs+1)*(nhmvbs+1)*sizeof(_est->dec_heap[0]));
-  _est->hit_bit=0;
-  /*TODO: Allow configuration.*/
-  _est->mv_res_min=0;
-  _est->flags=OD_MC_USEB|OD_MC_USEV;
-}
-
-static void od_mv_est_clear(od_mv_est_ctx *_est){
-  _ogg_free(_est->dec_heap);
-  _ogg_free(_est->col_counts);
-  _ogg_free(_est->row_counts);
-  _ogg_free(_est->dp_nodes);
-  od_free_2d(_est->refine_grid);
-  od_free_2d(_est->mvs);
-  od_free_2d(_est->sad_cache[0]);
-  od_free_2d(_est->sad_cache[1]);
-}
-
-
-
-/*STAGE 1: INITIAL MV ESTIMATES (via EPZS^2).*/
-
-
-
-/*The maximum luma plane SAD value for accepting set A predictors.*/
-static const int OD_YSAD_THRESH1[3]={16,64,256};
-/*The amount to right shift the minimum error by when inflating it for
-   computing the second maximum luma plane SAD threshold.*/
-#define OD_YSAD_THRESH2_SCALE_BITS (3)
-/*The amount to add to the second maximum luma plane threshold when inflating
-   it.*/
-static const int OD_YSAD_THRESH2_OFFS[3]={8,32,128};
-
-/*The vector offsets in the X direction for each search site in the square
-   pattern.*/
-static const int OD_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
-/*The vector offsets in the Y direction for each search site in the square
-   pattern.*/
-static const int OD_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
-
-/*The number of sites to search of each boundary condition in the square
-   pattern.
-  Bit flags for the boundary conditions are as follows:
-  1: -32==dx
-  2:      dx==31
-  4: -32==dy
-  8:      dy==31*/
-static const int OD_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
-/*The list of sites to search for each boudnary condition in the square
-   pattern.*/
-static const od_pattern OD_SQUARE_SITES[11]={
-  /* -32<dx<31,   -32<dy<31*/
-  {0,1,2,3,5,6,7,8},
-  /*-32==dx,      -32<dy<31*/
-  {1,2,5,7,8},
-  /*     dx==31,  -32<dy<31*/
-  {0,1,3,6,7},
-  /*-32==dx==31,  -32<dy<31*/
-  {-1},
-  /* -32<dx<31,  -32==dy*/
-  {3,5,6,7,8},
-  /*-32==dx,     -32==dy*/
-  {5,7,8},
-  /*     dx==31, -32==dy*/
-  {3,6,7},
-  /*-32==dx==31, -32==dy*/
-  {-1},
-  /* -32<dx<31,       dy==31*/
-  {0,1,2,3,5},
-  /*-32==dx,          dy==31*/
-  {1,2,5},
-  /*     dx==31,      dy==31*/
-  {0,1,3}
-};
-
-/*The number of sites to search of each boundary condition in the diamond
-   pattern.
-  Bit flags for the boundary conditions are as follows:
-  1: -32==dx
-  2:      dx==31
-  4: -32==dy
-  8:      dy==31*/
-static const int OD_DIAMOND_NSITES[11]={4,3,3,0,3,2,2,0,3,2,2};
-/*The list of sites to search for each boudnary condition in the square
-   pattern.*/
-static const od_pattern OD_DIAMOND_SITES[11]={
-  /* -32<dx<31,   -32<dy<31*/
-  {1,3,5,7},
-  /*-32==dx,      -32<dy<31*/
-  {1,5,7},
-  /*     dx==31,  -32<dy<31*/
-  {1,3,7},
-  /*-32==dx==31,  -32<dy<31*/
-  {-1},
-  /* -32<dx<31,  -32==dy*/
-  {3,5,7},
-  /*-32==dx,     -32==dy*/
-  {5,7},
-  /*     dx==31, -32==dy*/
-  {3,7},
-  /*-32==dx==31, -32==dy*/
-  {-1},
-  /* -32<dx<31,       dy==31*/
-  {1,3,5},
-  /*-32==dx,          dy==31*/
-  {1,5},
-  /*     dx==31,      dy==31*/
-  {1,3}
-};
-
-
-
-/*Clear the cache of motion vectors we've examined.*/
-static void od_mv_est_clear_hit_cache(od_mv_est_ctx *_est){
-  if(_est->hit_bit++==0)memset(_est->hit_cache,0,sizeof(_est->hit_cache));
-  else _est->hit_bit&=UCHAR_MAX;
-}
-
-/*Test if a motion vector has been examined.*/
-static int od_mv_est_is_hit(od_mv_est_ctx *_est,int _mvx,int _mvy){
-  return _est->hit_cache[_mvy+32][_mvx+32]==_est->hit_bit;
-}
-
-/*Mark a motion vector examined.*/
-static void od_mv_est_set_hit(od_mv_est_ctx *_est,int _mvx,int _mvy){
-  _est->hit_cache[_mvy+32][_mvx+32]=(unsigned char)_est->hit_bit;
-}
-
-/*Gets the predictor for a given MV node at the given MV resolution.*/
-static void od_state_get_predictor(od_state *_state,int _pred[2],
- int _vx,int _vy,int _level,int _mv_res){
-  int nhmvbs;
-  int nvmvbs;
-  nhmvbs=_state->nhmbs+1<<2;
-  nvmvbs=_state->nvmbs+1<<2;
-  if(_vx<2||_vy<2||_vx>nhmvbs-2||_vy>nvmvbs-2)_pred[0]=_pred[1]=0;
-  else{
-    od_mv_grid_pt *cneighbors[4];
-    int            a[4][2];
-    int            mvb_sz;
-    int            ncns;
-    int            ci;
-    mvb_sz=1<<(4-_level>>1);
-    ncns=4;
-    if(_level==0){
-      cneighbors[0]=_state->mv_grid[_vy-4]+_vx-4;
-      cneighbors[1]=_state->mv_grid[_vy-4]+_vx;
-      cneighbors[2]=_state->mv_grid[_vy-4]+_vx+4;
-      cneighbors[3]=_state->mv_grid[_vy]+_vx-4;
-    }
-    else{
-      if(_level&1){
-        cneighbors[0]=_state->mv_grid[_vy-mvb_sz]+_vx-mvb_sz;
-        cneighbors[1]=_state->mv_grid[_vy-mvb_sz]+_vx+mvb_sz;
-        cneighbors[2]=_state->mv_grid[_vy+mvb_sz]+_vx-mvb_sz;
-        cneighbors[3]=_state->mv_grid[_vy+mvb_sz]+_vx+mvb_sz;
-      }
-      else{
-        cneighbors[0]=_state->mv_grid[_vy-mvb_sz]+_vx;
-        cneighbors[1]=_state->mv_grid[_vy]+_vx-mvb_sz;
-        /*NOTE: Only one of these candidatss can be excluded at a time, so
-           there will always be at least 3.*/
-        if(_vx+mvb_sz>_vx+4&~3)ncns--;
-        else cneighbors[2]=_state->mv_grid[_vy]+_vx+mvb_sz;
-        if(_vy+mvb_sz>_vy+4&~3)ncns--;
-        else cneighbors[ncns-1]=_state->mv_grid[_vy+mvb_sz]+_vx;
-      }
-    }
-    for(ci=0;ci<ncns;ci++){
-      a[ci][0]=cneighbors[ci]->mv[0];
-      a[ci][1]=cneighbors[ci]->mv[1];
-    }
-    /*Median-of-4.*/
-    if(ncns>3){
-      /*fprintf(stderr,"Median of 4:\n");
-      fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1],a[3][0],a[3][1]);*/
-/*
-Sorting network for 4 elements:
-0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
-0001 0010 0011 0100 0101 0110 0111 1001 1010 1011 1101
-0:1
-0010 0010 0011 0100 0110 0110 0111 1010 1010 1011 1110
-0010 0011 0100 0110 0111 1010 1011
-2:3
-0010 0011 1000 1010 1011 1100 1110
-0010 0011 1010 1011
-0:2
-0010 0110 1010 1110
-0010 0110 1010
-1:3
-1000 1100 1010
-1010
-This last compare is unneeded for a median:
-1:2
-1100
-*/
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      OD_SORT2I(a[2][0],a[3][0]);
-      OD_SORT2I(a[2][1],a[3][1]);
-      OD_SORT2I(a[0][0],a[2][0]);
-      OD_SORT2I(a[0][1],a[2][1]);
-      OD_SORT2I(a[1][0],a[3][0]);
-      OD_SORT2I(a[1][1],a[3][1]);
-      /*fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1],a[3][0],a[3][1]);*/
-      _pred[0]=OD_DIV_POW2_RE(a[1][0]+a[2][0],_mv_res+1);
-      _pred[1]=OD_DIV_POW2_RE(a[1][1]+a[2][1],_mv_res+1);
-    }
-    /*Median-of-3.*/
-    else{
-      /*fprintf(stderr,"Median of 3:\n");
-      fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1]);*/
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      OD_SORT2I(a[1][0],a[2][0]);
-      OD_SORT2I(a[1][1],a[2][1]);
-      OD_SORT2I(a[0][0],a[1][0]);
-      OD_SORT2I(a[0][1],a[1][1]);
-      /*fprintf(stderr,"(%i,%i) (%i,%i) (%i,%i)\n",
-       a[0][0],a[0][1],a[1][0],a[1][1],a[2][0],a[2][1]);*/
-      _pred[0]=OD_DIV_POW2_RE(a[1][0],_mv_res);
-      _pred[1]=OD_DIV_POW2_RE(a[1][1],_mv_res);
-    }
-  }
-}
-
-/*Estimate the number of bits that will be used to encode the given MV.
-  The predictor must already have been subtracted off.*/
-static int od_mv_est_bits(int _dx,int _dy){
-  return od_ilog(abs(_dx))+(_dx!=0)+od_ilog(abs(_dy))+(_dy!=0)+2;
-}
-
-/*Checks to make sure our current mv_rate and sad values are correct.
-  This is used for debugging only.*/
-void od_mv_est_check_rd_block_state(od_mv_est_ctx *_est,int _ref,
- int _vx,int _vy,int _log_mvb_sz){
-  od_state      *state;
-  int half_mvb_sz;
-  state=&_est->enc->state;
-  half_mvb_sz=1<<_log_mvb_sz-1;
-  if(_log_mvb_sz>0&&state->mv_grid[_vy+half_mvb_sz][_vx+half_mvb_sz].valid){
-    od_mv_est_check_rd_block_state(_est,_ref,_vx,_vy,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx+half_mvb_sz,_vy,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx,_vy+half_mvb_sz,_log_mvb_sz-1);
-    od_mv_est_check_rd_block_state(_est,_ref,
-     _vx+half_mvb_sz,_vy+half_mvb_sz,_log_mvb_sz-1);
-  }
-  else{
-    unsigned char  pred[16][16];
-    od_mv_node    *block;
-    ogg_int32_t    sad;
-    int            c;
-    int            s;
-    block=_est->mvs[_vy]+_vx;
-    if(block->log_mvb_sz!=_log_mvb_sz){
-      fprintf(stderr,
-       "Failure at node (%i,%i): log_mvb_sz should be %i (is %i)\n",
-       _vx,_vy,_log_mvb_sz,block->log_mvb_sz);
-    }
-    if(_log_mvb_sz<2){
-      int mask;
-      mask=(1<<_log_mvb_sz+1)-1;
-      c=!!(_vx&mask);
-      if(_vy&mask)c=3-c;
-      if(block->c!=c){
-        fprintf(stderr,"Failure at node (%i,%i): c should be %i (is %i)\n",
-         _vx,_vy,c,block->c);
-      }
-      s=state->mv_grid[_vy+(OD_VERT_DY[c+1&3]<<_log_mvb_sz)][
-       _vx+(OD_VERT_DX[c+1&3]<<_log_mvb_sz)].valid|
-       state->mv_grid[_vy+(OD_VERT_DY[c+3&3]<<_log_mvb_sz)][
-       _vx+(OD_VERT_DX[c+3&3]<<_log_mvb_sz)].valid<<1;
-    }
-    else{
-      c=0;
-      s=3;
-    }
-    if(block->s!=s){
-      fprintf(stderr,"Failure at node (%i,%i): s should be %i (is %i)\n",
-       _vx,_vy,s,block->s);
-    }
-    od_state_pred_block_from_setup(state,pred[0],sizeof(pred[0]),_ref,0,
-     _vx,_vy,c,s,_log_mvb_sz);
-    sad=od_state_sad8(state,pred[0],sizeof(pred[0]),1,
-     _vx-2<<2,_vy-2<<2,_log_mvb_sz+2);
-    if(block->sad!=sad){
-      fprintf(stderr,"Failure at node (%i,%i): sad should be %i (is %i)\n",
-       _vx,_vy,sad,block->sad);
-    }
-  }
-}
-
-/*Checks to make sure our current mv_rate and sad values are correct.
-  This is used for debugging only.*/
-void od_mv_est_check_rd_state(od_mv_est_ctx *_est,int _ref,int _mv_res){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  for(vy=0;vy<nvmvbs;vy+=4){
-    for(vx=0;vx<nhmvbs;vx+=4){
-      od_mv_est_check_rd_block_state(_est,_ref,vx,vy,2);
-    }
-  }
-  for(vy=0;vy<nvmvbs;vy++)for(vx=0;vx<nhmvbs;vx++){
-    od_mv_grid_pt *mvg;
-    od_mv_node    *mv;
-    int            pred[2];
-    int            mv_rate;
-    mvg=state->mv_grid[vy]+vx;
-    if(!mvg->valid)continue;
-    mv=_est->mvs[vy]+vx;
-    if(vx>=2&&vx<=nhmvbs-2&&vy>=2&&vy<=nvmvbs-2){
-      od_state_get_predictor(state,pred,vx,vy,OD_MC_LEVEL[vy&3][vx&3],_mv_res);
-      mv_rate=od_mv_est_bits(
-       (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-    }
-    else pred[0]=pred[1]=mv_rate=0;
-    if(mv_rate!=mv->mv_rate){
-      fprintf(stderr,"Failure at node (%i,%i): mv_rate should be %i (is %i)\n",
-       vx,vy,mv_rate,mv->mv_rate);
-      fprintf(stderr,"Predictor was: (%i,%i)   MV was: (%i,%i)\n",
-       pred[0],pred[1],mvg->mv[0]>>_mv_res,mvg->mv[1]>>_mv_res);
-    }
-  }
-}
-
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-static const unsigned char OD_YCbCr_MVCAND[3]={210, 16,214};
-#endif
-
-static void od_mv_est_init_mv(od_mv_est_ctx *_est,int _ref,int _vx,int _vy){
-  od_state      *state;
-  od_img_plane  *iplane;
-  od_mv_grid_pt *mvg;
-  od_mv_node    *mv;
-  od_mv_node    *cneighbors[4];
-  od_mv_node    *pneighbors[4];
-  ogg_int32_t    t2;
-  ogg_int32_t    best_sad;
-  ogg_int32_t    best_cost;
-  int            best_rate;
-  int            cands[6][2];
-  int            best_vec[2];
-  int            a[4][2];
-  int            refi;
-  int            level;
-  int            log_mvb_sz;
-  int            mvb_sz;
-  int            bx;
-  int            by;
-  int            ncns;
-  int            mvxmin;
-  int            mvxmax;
-  int            mvymin;
-  int            mvymax;
-  int            candx;
-  int            candy;
-  int            predx;
-  int            predy;
-  int            ci;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  int            x0;
-  int            y0;
-#endif
-  /*fprintf(stderr,"Initial search for MV (%i,%i):\n",_vx,_vy);*/
-  state=&_est->enc->state;
-  refi=state->ref_imgi[_ref];
-  iplane=state->ref_imgs[refi].planes+0;
-  mv=_est->mvs[_vy]+_vx;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  mvg=state->mv_grid[_vy]+_vx;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    od_state_mc_predict(state,_ref);
-    od_state_fill_vis(state);
-    x0=(_vx-2<<3)+(OD_UMV_PADDING<<1);
-    y0=(_vy-2<<3)+(OD_UMV_PADDING<<1);
-  }
-#endif
-  /*fprintf(stderr,"Level %i (%ix%i block)\n",level,mvb_sz<<2,mvb_sz<<2);*/
-  bx=_vx-2<<2;
-  by=_vy-2<<2;
-  mvxmin=OD_MAXI(bx-(mvb_sz<<2)-32,-16)-(bx-(mvb_sz<<2));
-  mvxmax=OD_MINI(bx+(mvb_sz<<2)+32,state->info.frame_width+16)-
-   (bx+(mvb_sz<<2))-1;
-  mvymin=OD_MAXI(by-(mvb_sz<<2)-32,-16)-(by-(mvb_sz<<2));
-  mvymax=OD_MINI(by+(mvb_sz<<2)+32,state->info.frame_height+16)-
-   (by+(mvb_sz<<2))-1;
-  /*fprintf(stderr,"(%i,%i): Search range: [%i,%i]x[%i,%i]\n",
-   bx,by,mvxmin,mvymin,mvxmax,mvymax);*/
-  bx-=mvb_sz<<1;
-  by-=mvb_sz<<1;
-  ncns=4;
-  if(level==0){
-    cneighbors[0]=_est->mvs[_vy-4]+_vx-4;
-    cneighbors[1]=_est->mvs[_vy-4]+_vx;
-    cneighbors[2]=_est->mvs[_vy-4]+_vx+4;
-    cneighbors[3]=_est->mvs[_vy]+_vx-4;
-    pneighbors[0]=_est->mvs[_vy-4]+_vx;
-    pneighbors[1]=_est->mvs[_vy]+_vx-4;
-    pneighbors[2]=_est->mvs[_vy]+_vx+4;
-    pneighbors[3]=_est->mvs[_vy+4]+_vx;
-  }
-  else{
-    if(level&1){
-      pneighbors[0]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      pneighbors[1]=_est->mvs[_vy-mvb_sz]+_vx+mvb_sz;
-      pneighbors[2]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-      pneighbors[3]=_est->mvs[_vy+mvb_sz]+_vx+mvb_sz;
-      memcpy(cneighbors,pneighbors,sizeof(cneighbors));
-    }
-    else{
-      pneighbors[0]=_est->mvs[_vy-mvb_sz]+_vx;
-      pneighbors[1]=_est->mvs[_vy]+_vx-mvb_sz;
-      pneighbors[2]=_est->mvs[_vy]+_vx+mvb_sz;
-      pneighbors[3]=_est->mvs[_vy+mvb_sz]+_vx;
-      cneighbors[0]=pneighbors[0];
-      cneighbors[1]=pneighbors[1];
-      /*NOTE: Only one of these candidatss can be excluded at a time, so
-         there will always be at least 3.*/
-      if(_vx+mvb_sz>_vx+4&~3)ncns--;
-      else cneighbors[2]=pneighbors[2];
-      if(_vy+mvb_sz>_vy+4&~3)ncns--;
-      else cneighbors[ncns-1]=pneighbors[3];
-    }
-  }
-  /*Spatially correlated predictors (from the current frame):*/
-  for(ci=0;ci<ncns;ci++){
-    a[ci][0]=cneighbors[ci]->mvs[0][_ref][0];
-    a[ci][1]=cneighbors[ci]->mvs[0][_ref][1];
-    cands[ci][0]=OD_CLAMPI(mvxmin,a[ci][0],mvxmax);
-    cands[ci][1]=OD_CLAMPI(mvymin,a[ci][1],mvymax);
-  }
-  /*Compute the median predictor:*/
-  if(ncns>3){
-    /*Median-of-4.*/
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    OD_SORT2I(a[2][0],a[3][0]);
-    OD_SORT2I(a[2][1],a[3][1]);
-    OD_SORT2I(a[0][0],a[2][0]);
-    OD_SORT2I(a[0][1],a[2][1]);
-    OD_SORT2I(a[1][0],a[3][0]);
-    OD_SORT2I(a[1][1],a[3][1]);
-    predx=a[1][0]+a[2][0];
-    predy=a[1][1]+a[2][1];
-    candx=OD_CLAMPI(mvxmin,OD_DIV2(predx),mvxmax);
-    candy=OD_CLAMPI(mvymin,OD_DIV2(predy),mvymax);
-  }
-  else{
-    /*Median-of-3.*/
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    OD_SORT2I(a[1][0],a[2][0]);
-    OD_SORT2I(a[1][1],a[2][1]);
-    OD_SORT2I(a[0][0],a[1][0]);
-    OD_SORT2I(a[0][1],a[1][1]);
-    predx=a[1][0]<<1;
-    predy=a[1][1]<<1;
-    candx=OD_CLAMPI(mvxmin,a[1][0],mvxmax);
-    candy=OD_CLAMPI(mvymin,a[1][1],mvymax);
-  }
-  od_mv_est_clear_hit_cache(_est);
-  /*fprintf(stderr,"%p (%i,%i)\n",iplane->data,bx+candx,by+candy);*/
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-     OD_YCbCr_MVCAND);
-  }
-#endif
-  best_sad=od_state_sad8(state,
-   iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-   iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-  best_rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-  best_cost=(best_sad<<OD_LAMBDA_SCALE)+best_rate*_est->lambda;
-  /*fprintf(stderr,"Median predictor: (%i,%i)   Error: %i\n",candx,candy,best_err);*/
-  od_mv_est_set_hit(_est,candx,candy);
-  best_vec[0]=candx;
-  best_vec[1]=candy;
-  /*fprintf(stderr,"Threshold: %i\n",OD_YSAD_THRESH1[log_mvb_sz]);*/
-  if(best_sad>OD_YSAD_THRESH1[log_mvb_sz]){
-    ogg_int32_t sad;
-    ogg_int32_t cost;
-    int         rate;
-    /*Compute the early termination threshold for set B.*/
-    t2=mv->bma_sad;
-    for(ci=0;ci<ncns;ci++){
-      int log_cnb_sz;
-      log_cnb_sz=4-OD_MC_LEVEL[cneighbors[ci]->vy&3][cneighbors[ci]->vx&3]>>1;
-      t2=OD_MINI(t2,cneighbors[ci]->bma_sad>>(log_cnb_sz-log_mvb_sz<<1));
-    }
-    t2=t2+(t2>>OD_YSAD_THRESH2_SCALE_BITS)+OD_YSAD_THRESH2_OFFS[log_mvb_sz];
-    /*Constant velocity predictor:*/
-    cands[ncns][0]=OD_CLAMPI(mvxmin,OD_DIV8(mv->mvs[1][_ref][0]),mvxmax);
-    cands[ncns][1]=OD_CLAMPI(mvymin,OD_DIV8(mv->mvs[1][_ref][1]),mvymax);
-    ncns++;
-    /*Zero predictor.*/
-    cands[ncns][0]=0;
-    cands[ncns][1]=0;
-    ncns++;
-    /*Examine the candidates in Set B.*/
-    for(ci=0;ci<ncns;ci++){
-      candx=cands[ci][0];
-      candy=cands[ci][1];
-      /*fprintf(stderr,"Set B predictor %i: (%i,%i) ",ci,candx,candy);*/
-      if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-      od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-      if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-        od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-         OD_YCbCr_MVCAND);
-      }
-#endif
-      sad=od_state_sad8(state,
-       iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-       iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-      rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-      cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-      /*fprintf(stderr,"   Error: %i\n",err);*/
-      if(cost<best_cost){
-        best_sad=sad;
-        best_rate=rate;
-        best_cost=cost;
-        best_vec[0]=candx;
-        best_vec[1]=candy;
-      }
-    }
-    /*fprintf(stderr,"Threshold: %i\n",t2);*/
-    if(best_sad>t2){
-      /*Constant velocity predictors from the previous frame:*/
-      for(ci=0;ci<4;ci++){
-        cands[ci][0]=
-         OD_CLAMPI(mvxmin,OD_DIV8(pneighbors[ci]->mvs[1][_ref][0]),mvxmax);
-        cands[ci][1]=
-         OD_CLAMPI(mvymin,OD_DIV8(pneighbors[ci]->mvs[1][_ref][1]),mvymax);
-      }
-      /*The constant acceleration predictor:*/
-      cands[4][0]=OD_CLAMPI(mvxmin,OD_DIV_ROUND_POW2(
-       mv->mvs[1][_ref][0]*_est->mvapw[_ref][0]-
-       mv->mvs[2][_ref][0]*_est->mvapw[_ref][1],16,0x8000),mvxmax);
-      cands[4][1]=OD_CLAMPI(mvymin,OD_DIV_ROUND_POW2(
-       mv->mvs[1][_ref][1]*_est->mvapw[_ref][0]-
-       mv->mvs[2][_ref][1]*_est->mvapw[_ref][1],16,0x8000),mvymax);
-      /*Examine the candidates in Set C.*/
-      for(ci=0;ci<5;ci++){
-        candx=cands[ci][0];
-        candy=cands[ci][1];
-        /*fprintf(stderr,"Set C predictor %i: (%i,%i) ",ci,candx,candy);*/
-        if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-        /*if(od_mv_est_is_hit(_est,candx,candy))continue;*/
-        od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-        if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-          od_img_draw_line(&state->vis_img,x0,y0,x0+(candx<<1),y0+(candy<<1),
-           OD_YCbCr_MVCAND);
-        }
-#endif
-        sad=od_state_sad8(state,
-         iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-         iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-        rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-        cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-        /*fprintf(stderr,"   Error: %i\n",err);*/
-        if(cost<best_cost){
-          best_sad=sad;
-          best_rate=rate;
-          best_cost=cost;
-          best_vec[0]=candx;
-          best_vec[1]=candy;
-        }
-      }
-      /*Use the same threshold for Set C as in Set B.*/
-      /*fprintf(stderr,"Threshold: %i\n",t2);*/
-      if(best_sad>t2){
-        int best_site;
-        int nsites;
-        int sitei;
-        int site;
-        int b;
-        /*Square pattern search.*/
-        for(;;){
-          best_site=4;
-          b=(best_vec[0]<=mvxmin)|(best_vec[0]>=mvxmax)<<1|
-           (best_vec[1]<=mvymin)<<2|(best_vec[1]>=mvymax)<<3;
-          nsites=OD_SQUARE_NSITES[b];
-          for(sitei=0;sitei<nsites;sitei++){
-            site=OD_SQUARE_SITES[b][sitei];
-            candx=best_vec[0]+OD_SQUARE_DX[site];
-            candy=best_vec[1]+OD_SQUARE_DY[site];
-            /*fprintf(stderr,"Square search %i: (%i,%i) ",site,candx,candy);*/
-            if(od_mv_est_is_hit(_est,candx,candy)){/*fprintf(stderr,"...Skipping.\n");*/continue;}
-            od_mv_est_set_hit(_est,candx,candy);
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-            if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-              od_img_draw_line(&state->vis_img,x0,y0,
-               x0+(candx<<1),y0+(candy<<1),OD_YCbCr_MVCAND);
-            }
-#endif
-            sad=od_state_sad8(state,
-             iplane->data+(by+candy)*(iplane->ystride<<1)+(bx+candx<<1),
-             iplane->ystride<<1,2,bx,by,log_mvb_sz+2);
-            rate=od_mv_est_bits((candx<<1)-predx,(candy<<1)-predy);
-            cost=(sad<<OD_LAMBDA_SCALE)+rate*_est->lambda;
-            /*fprintf(stderr,"   Error: %i\n",err);*/
-            if(cost<best_cost){
-              best_sad=sad;
-              best_rate=rate;
-              best_cost=cost;
-              best_site=site;
-            }
-          }
-          if(best_site==4)break;
-          best_vec[0]+=OD_SQUARE_DX[best_site];
-          best_vec[1]+=OD_SQUARE_DY[best_site];
-        }
-      }
-    }
-  }
-  /*fprintf(stderr,"Finished. Best vector: (%i,%i)  Best error %i\n",
-   best_vec[0],best_vec[1],best_err);*/
-  mv->mvs[0][_ref][0]=best_vec[0];
-  mv->mvs[0][_ref][1]=best_vec[1];
-  mvg->mv[0]=best_vec[0]<<3;
-  mvg->mv[1]=best_vec[1]<<3;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-  if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-    char             iter_label[16];
-    const od_offset *anc;
-    od_mv_grid_pt   *amvg;
-    int              nanc;
-    int              ai;
-    int              ax;
-    int              ay;
-    mvg->valid=1;
-    nanc=OD_NANCESTORS[_vy&3][_vx&3];
-    anc=OD_ANCESTORS[_vy&3][_vx&3];
-    for(ai=0;ai<nanc;ai++){
-      ax=_vx+anc[ai][0];
-      if(ax<0||ax>(state->nhmbs+1<<2))continue;
-      ay=_vy+anc[ai][1];
-      if(ay<0||ay>(state->nvmbs+1<<2))continue;
-      amvg=state->mv_grid[ay]+ax;
-      amvg->valid=1;
-    }
-    sprintf(iter_label,"ani%08i",state->ani_iter++);
-    od_state_dump_img(state,&state->vis_img,iter_label);
-  }
-#endif
-  mv->bma_sad=best_sad;
-  mv->mv_rate=best_rate;
-  /*od_state_get_predictor(state,a[0],_vx,_vy,level,2);
-  if(a[0][0]!=predx||a[0][1]!=predy){
-    fprintf(stderr,"Failure in MV predictor init: (%i,%i)!=(%i,%i)\n",
-     a[0][0],a[0][1],predx,predy);
-  }
-  mv->mv_rate=od_mv_est_bits((mvg->mv[0]>>2)-a[0][0],(mvg->mv[1]>>2)-a[0][1]);
-  if(mv->mv_rate!=best_rate){
-    fprintf(stderr,"Failure in MV rate init: %i!=%i\n",mv->mv_rate,best_rate);
-  }*/
-}
-
-static void od_mv_est_init_mvs(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  /*TODO: Initialize a MVB at a time, for better cache coherency.*/
-  /*Move the motion vector predictors back a frame.*/
-  for(vy=2;vy<=nvmvbs-2;vy++)for(vx=2;vx<=nhmvbs-2;vx++){
-    od_mv_node *mv;
-    mv=_est->mvs[vy]+vx;
-    memmove(mv->mvs+1,mv->mvs+0,sizeof(mv->mvs[0])<<1);
-  }
-  /*Level 0 vertices.*/
-  for(vy=4;vy<nvmvbs;vy+=4){
-    for(vx=4;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 1 vertices.*/
-  for(vy=2;vy<nvmvbs;vy+=4){
-    for(vx=2;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 2 vertices.*/
-  for(vy=2;;vy+=2){
-    for(vx=4;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-    vy+=2;
-    if(vy>=nvmvbs)break;
-    for(vx=2;vx<nhmvbs;vx+=4)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 3 vertices.*/
-  for(vy=3;vy<nvmvbs-1;vy+=2){
-    for(vx=3;vx<nhmvbs-1;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-  /*Level 4 vertices.*/
-  for(vy=2;;vy++){
-    for(vx=3;vx<nhmvbs-1;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-    vy++;
-    if(vy>=nvmvbs-1)break;
-    for(vx=2;vx<nhmvbs;vx+=2)od_mv_est_init_mv(_est,_ref,vx,vy);
-  }
-}
-
-
-
-/*STAGE 2: DECIMATION.*/
-
-
-
-/*Merging domains.
-  These are stored as lists of offsets to the vertices in the domain.
-  Note that vertices in the merging domain must appear in order from finest
-   scale (largest level) to coarsest (smallest level).
-  Each list ends with the vertex (0,0), the actual vertex be decimated.*/
-/*Level 4 vertex:
-            4
-*/
-static const od_offset OD_MERGEDOM4[1]={
-  {0,0},
-};
-
-/*Level 3 vertex:
-            4
-          4-3-4
-            4
-*/
-static const od_offset OD_MERGEDOM3[5]={
-  { 0,-1},{-1, 0},{ 1, 0},{ 0, 1},{ 0, 0}
-};
-
-/*Level 2 vertex:
-          4   4
-          |   |
-        4-3-4-3-4
-          | | |
-          4-2-4
-          | | |
-        4-3-4-3-4
-          |   |
-          4   4
-*/
-static const od_offset OD_MERGEDOM2[17]={
-  {-1,-2},{ 1,-2},{-2,-1},{ 0,-1},{ 2,-1},{-1, 0},{ 1, 0},{-2, 1},
-  { 0, 1},{ 2, 1},{-1, 2},{ 1, 2},{-1,-1},{ 1,-1},{-1, 1},{ 1, 1},
-  { 0, 0}
-};
-
-/*Level 1 vertex:
-          4   4
-          |   |
-        4-3-4-3-4
-          | | |
-      4   4-2-4   4
-      |   | | |   |
-    4-3-4-3-4-3-4-3-4
-      | | | | | | |
-      4-2-4-1-4-2-4
-      | | | | | | |
-    4-3-4-3-4-3-4-3-4
-      |   | | |   |
-      4   4-2-4   4
-          | | |
-        4-3-4-3-4
-          |   |
-          4   4
-*/
-static const od_offset OD_MERGEDOM1[49]={
-  {-1,-4},{ 1,-4},{-2,-3},{ 0,-3},{ 2,-3},{-3,-2},{-1,-2},{ 1,-2},
-  { 3,-2},{-4,-1},{-2,-1},{ 0,-1},{ 2,-1},{ 4,-1},{-3, 0},{-1, 0},
-  { 1, 0},{ 3, 0},{-4, 1},{-2, 1},{ 0, 1},{ 2, 1},{ 4, 1},{-3, 2},
-  {-1, 2},{ 1, 2},{ 3, 2},{-2, 3},{ 0, 3},{ 2, 3},{-1, 4},{ 1, 4},
-  {-1,-3},{ 1,-3},{-3,-1},{-1,-1},{ 1,-1},{ 3,-1},{-3, 1},{-1, 1},
-  { 1, 1},{ 3, 1},{-1, 3},{ 1, 3},{ 0,-2},{-2, 0},{ 2, 0},{ 0, 2},
-  { 0, 0}
-};
-
-/*The merging domain for a vertex, indexed by level-1.*/
-static const od_offset *OD_MERGEDOM[4]={
-  OD_MERGEDOM1,
-  OD_MERGEDOM2,
-  OD_MERGEDOM3,
-  OD_MERGEDOM4
-};
-
-/*Error support regions.
-  These are the blocks whose SAD will change after decimating a vertex at a
-   given level, assuming no other vertices in the mesh have been decimated.
-  Vertices in the figures at a higher level than the one removed illustrate one
-   possible configuration; there may be others.*/
-struct od_mv_err_node{
-  int dx;
-  int dy;
-  int log_mvb_sz;
-};
-
-/*Level 4 support:
-          4-3-4
-          |/|\|
-          2-.-1
-          |\|/|
-          4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM4[4]={
-  {-1,-1,0},{ 0,-1,0},{-1, 0,0},{ 0, 0,0}
-};
-
-/*Level 3 support:
-          4-3-4
-          |/|\|
-        4-0-.-2-4
-        |/|   |\|
-        3-.   .-3
-        |\|   |/|
-        4-2-.-1-4
-          |\|/|
-          4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM3[9]={
-  {-1,-2,0},{ 0,-2,0},{-2,-1,0},{ 1,-1,0},
-  {-2, 0,0},{ 1, 0,0},{-1, 1,0},{ 0, 1,0},
-  {-1,-1,1}
-};
-
-/*Level 2 support:
-        4-3-4-3-4
-        |/|\|/|\|
-      4-2-.-1-.-2-4
-      |/|  /|\  |\|
-      3-. / | \ .-3
-      |\|/  |  \|/|
-      4-0---.---0-4
-      |/|\  |  /|\|
-      3-. \ | / .-3
-      |\|  \|/  |/|
-      4-2-.-1-.-2-4
-        |\|/|\|/|
-        4-3-4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM2[20]={
-  {-2,-3,0},{-1,-3,0},{ 0,-3,0},{ 1,-3,0},
-  {-3,-2,0},{ 2,-2,0},{-3,-1,0},{ 2,-1,0},
-  {-3, 0,0},{ 2, 0,0},{-3, 1,0},{ 2, 1,0},
-  {-2, 2,0},{-1, 2,0},{ 0, 2,0},{ 1, 2,0},
-  {-2,-2,1},{ 0,-2,1},{-2, 0,1},{ 0, 0,1}
-};
-
-/*Level 1 support:
-        4-3-4-3-4
-        |/|\|/|\|
-      4-2-.-1-.-2-4
-      |/|  /|\  |\|
-    4-3-. / | \ .-3-4
-    |/| |/  |  \| |\|
-  4-2-.-0---.---0-.-2-4
-  |/|  /|       |\  |\|
-  3-. / |       | \ .-3
-  |\|/  |       |  \|/|
-  4-1---.       .---1-4
-  |/|\  |       |  /|\|
-  3-. \ |       | / .-3
-  |\|  \|       |/  |/|
-  4-2-.-0---.---0-.-2-4
-    |\| |\  |  /| |/|
-    4-3-. \ | / .-3-4
-      |\|  \|/  |/|
-      4-2-.-1-.-2-4
-        |\|/|\|/|
-        4-3-4-3-4
-*/
-static const od_mv_err_node OD_ERRDOM1[37]={
-  {-2,-5,0},{-1,-5,0},{ 0,-5,0},{ 1,-5,0},
-  {-3,-4,0},{ 2,-4,0},{-4,-3,0},{-3,-3,0},
-  { 2,-3,0},{ 3,-3,0},{-5,-2,0},{ 4,-2,0},
-  {-5,-1,0},{ 4,-1,0},{-5, 0,0},{ 4, 0,0},
-  {-5, 1,0},{ 4, 1,0},{-4, 2,0},{-3, 2,0},
-  { 2, 2,0},{ 3, 2,0},{-3, 3,0},{ 2, 3,0},
-  {-2, 4,0},{-1, 4,0},{ 0, 4,0},{ 1, 4,0},
-  {-2,-4,1},{ 0,-4,1},{-4,-2,1},{ 2,-2,1},
-  {-4, 0,1},{ 2, 0,1},{-2, 2,1},{ 0, 2,1},
-  {-2,-2,2}
-};
-
-/*The number of blocks in each decimated error domain.*/
-static const int OD_NERRDOM[4]={37,20,9,4};
-/*The error domain for a vertex, indexed by level-1.*/
-static const od_mv_err_node *OD_ERRDOM[4]={
-  OD_ERRDOM1,
-  OD_ERRDOM2,
-  OD_ERRDOM3,
-  OD_ERRDOM4
-};
-
-/*Returns -1, 0, or 1, depending if -_dd1/_dr1 is less, equal or greater than
-   -_dd2/_dr2.*/
-static int od_mv_dddr_cmp(ogg_int32_t _dd1,int _dr1,
- ogg_int32_t  _dd2,int _dr2){
-  ogg_int64_t diff;
-  /*dr==0 and dd!=0 should not be possible, but we check for it anyway just in
-     case, to prevent a bug from trashing the whole optimization process.*/
-  if(_dr1==0)return _dr2==0?OD_SIGNI(_dd2-_dd1):_dd1<=0?-1:1;
-  else if(_dr2==0)return _dd2<=0?1:-1;
-  diff=_dd2*(ogg_int64_t)_dr1-_dd1*(ogg_int64_t)_dr2;
-  return OD_SIGNI(diff);
-}
-
-/*Compare two nodes on the decimation heap.*/
-static int od_mv_dec_cmp(od_mv_node *_n1,od_mv_node *_n2){
-  return od_mv_dddr_cmp(_n1->dd,_n1->dr,_n2->dd,_n2->dr);
-}
-
-/*Swap the two nodes on the decimation heap at indices _p and _q.*/
-static void od_mv_dec_heap_swap(od_mv_node **_heap,int _p,int _q){
-  od_mv_node *t;
-  _heap[_p]->heapi=_q;
-  _heap[_q]->heapi=_p;
-  t=_heap[_p];
-  _heap[_p]=_heap[_q];
-  _heap[_q]=t;
-}
-
-/*Convert the list of nodes to be decimated to a heap.*/
-static void od_mv_dec_heapify(od_mv_est_ctx *_est){
-  od_mv_node **heap;
-  int          l;
-  int          r;
-  int          i;
-  heap=_est->dec_heap;
-  l=_est->dec_nheap>>1;
-  r=_est->dec_nheap-1;
-  for(i=l;i-->0;){
-    int p;
-    p=i;
-    do{
-      int q;
-      q=(p<<1)+1;
-      if(q<r&&od_mv_dec_cmp(heap[q],heap[q+1])>=0)q++;
-      if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-      od_mv_dec_heap_swap(heap,p,q);
-      p=q;
-    }
-    while(p<l);
-  }
-}
-
-/*Restore the heap structure at the given index by moving it down the heap.*/
-static void od_mv_dec_heap_down(od_mv_est_ctx *_est,int _heapi){
-  od_mv_node **heap;
-  int          l;
-  int          r;
-  int          p;
-  heap=_est->dec_heap;
-  l=_est->dec_nheap>>1;
-  r=_est->dec_nheap-1;
-  p=_heapi;
-  while(p<l){
-    int q;
-    q=(p<<1)+1;
-    if(q<r&&od_mv_dec_cmp(heap[q],heap[q+1])>=0)q++;
-    if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-    od_mv_dec_heap_swap(heap,p,q);
-    p=q;
-  }
-}
-
-/*Restore the heap structure at the given index by moving it up the heap.*/
-static void od_mv_dec_heap_up(od_mv_est_ctx *_est,int _heapi){
-  od_mv_node **heap;
-  int          p;
-  heap=_est->dec_heap;
-  p=_heapi;
-  while(p>0){
-    int q;
-    q=p;
-    p=(q+1>>1)-1;
-    if(od_mv_dec_cmp(heap[p],heap[q])<=0)break;
-    od_mv_dec_heap_swap(heap,p,q);
-  }
-}
-
-/*Retrieve the item at the top of the heap.
-  Returns NULL if there are no more nodes to decimate.*/
-static od_mv_node *od_mv_dec_heap_delhead(od_mv_est_ctx *_est){
-  od_mv_node *ret;
-  if(_est->dec_nheap<=0)return NULL;
-  ret=_est->dec_heap[0];
-  ret->heapi=-1;
-  if(--_est->dec_nheap>0){
-    _est->dec_heap[0]=_est->dec_heap[_est->dec_nheap];
-    _est->dec_heap[0]->heapi=0;
-    od_mv_dec_heap_down(_est,0);
-  }
-  return ret;
-}
-
-static void od_mv_dec_heap_del(od_mv_est_ctx *_est,od_mv_node *_node){
-  int heapi;
-  heapi=_node->heapi;
-  if(heapi>=0){
-    _node->heapi=-1;
-    _est->dec_nheap--;
-    if(_est->dec_nheap>heapi){
-      _est->dec_heap[heapi]=_est->dec_heap[_est->dec_nheap];
-      _est->dec_heap[heapi]->heapi=heapi;
-      if(od_mv_dec_cmp(_node,_est->dec_heap[heapi])>=0){
-        od_mv_dec_heap_up(_est,heapi);
-      }
-      else od_mv_dec_heap_down(_est,heapi);
-    }
-    else _est->dec_heap[_est->dec_nheap]=NULL;
-  }
-}
-
-/*Sets the dd and dr values of the given node, restoring the heap structure
-   afterwards.*/
-static void od_mv_dec_update(od_mv_est_ctx *_est,od_mv_node *_node,
- int _dd,int _dr){
-  int diff;
-  diff=od_mv_dddr_cmp(_dd,_dr,_node->dd,_node->dr);
-  _node->dd=_dd;
-  _node->dr=_dr;
-  if(_node->heapi>=0){
-    if(diff<=0)od_mv_dec_heap_up(_est,_node->heapi);
-    else od_mv_dec_heap_down(_est,_node->heapi);
-  }
-}
-
-/*Computes the SAD of a top-level block.
-  We can't use od_mv_est_block_sad8 because the blocks haven't actually been
-   decimated to the correct size yet.*/
-static ogg_int32_t od_mv_est_dec_sad_top8(od_mv_est_ctx *_est,int _ref,
- int _vx,int _vy){
-  od_state      *state;
-  unsigned char  pred[16][16];
-  state=&_est->enc->state;
-  od_state_pred_block_from_setup(state,pred[0],sizeof(pred[0]),_ref,0,
-   _vx,_vy,0,3,2);
-  return od_state_sad8(state,pred[0],sizeof(pred[0]),1,_vx-2<<2,_vy-2<<2,4);
-}
-
-/*Computes the SAD of the specified block with all possible edge splittings.
-  While we're here, we return the index of the exterior corner, which is needed
-   by our caller.
-  We can't use od_mv_est_block_sad8 because the blocks haven't actually been
-   decimated to the correct size yet, nor the edge-split state filled in.*/
-static int od_mv_est_dec_sad_all8(od_mv_est_ctx *_est,ogg_int32_t _sads[4],
- int _ref,int _vx,int _vy,int _log_mvb_sz){
-  od_state      *state;
-  unsigned char  pred[16][16];
-  int            mask;
-  int            s;
-  int            c;
-  state=&_est->enc->state;
-  mask=(1<<_log_mvb_sz+1)-1;
-  c=!!(_vx&mask);
-  if(_vy&mask)c=3-c;
-  /*fprintf(stderr,"Computing SADs for (%i,%i) (%ix%i) c:%i\n",_vx-2<<2,_vy-2<<2,1<<_log_blk_sz,1<<_log_blk_sz,c);*/
-  for(s=0;s<4;s++){
-    od_state_pred_block_from_setup(state,pred[0],sizeof(pred[0]),_ref,0,
-     _vx,_vy,c,s,_log_mvb_sz);
-    _sads[s]=od_state_sad8(state,pred[0],sizeof(pred[0]),1,_vx-2<<2,_vy-2<<2,
-     _log_mvb_sz+2);
-  }
-  return c;
-}
-
-/*Computes the SAD of all blocks at all scales with all possible edge
-   splittings, using OBMC.
-  These are what will drive the error of the adaptive subdivision process.*/
-static void od_mv_est_calc_sads(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  int       s;
-  state=&_est->enc->state;
-  /*TODO: Interleaved evaluation would probably provide better cache
-     coherency.*/
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  for(vy=0;vy<nvmvbs;vy++){
-    od_mv_node *mv_row;
-    mv_row=_est->mvs[vy];
-    for(vx=0;vx<nhmvbs;vx++){
-      ogg_int32_t sads[4];
-      /*While we're here, fill in the block's setup sate.*/
-      mv_row[vx].c=od_mv_est_dec_sad_all8(_est,sads,_ref,vx,vy,0);
-      mv_row[vx].s=3;
-      mv_row[vx].log_mvb_sz=0;
-      mv_row[vx].sad=sads[3];
-      for(s=0;s<4;s++)_est->sad_cache[0][vy][vx][s]=(ogg_uint16_t)sads[s];
-    }
-  }
-  nhmvbs>>=1;
-  nvmvbs>>=1;
-  for(vy=0;vy<nvmvbs;vy++){
-    for(vx=0;vx<nhmvbs;vx++){
-      ogg_int32_t sads[4];
-      od_mv_est_dec_sad_all8(_est,sads,_ref,vx<<1,vy<<1,1);
-      for(s=0;s<4;s++)_est->sad_cache[1][vy][vx][s]=(ogg_uint16_t)sads[s];
-    }
-  }
-}
-
-static void od_mv_est_calc_node_bits(od_mv_est_ctx *_est){
-  od_state      *state;
-  od_mv_node    *mv_row;
-  od_mv_grid_pt *grid;
-  int            nhmvbs;
-  int            nvmvbs;
-  int            etype;
-  int            ebits;
-  int            vx;
-  int            vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_est->flags&OD_MC_USEV){
-    if(_est->flags&OD_MC_USEB){
-      etype=0;
-      ebits=3;
-    }
-    else{
-      etype=1;
-      ebits=0;
-    }
-  }
-  else etype=ebits=0;
-  for(vy=0;vy<=nvmvbs;vy++){
-    mv_row=_est->mvs[vy];
-    grid=state->mv_grid[vy];
-    for(vx=0;vx<=nhmvbs;vx++){
-      int level;
-      /*While we're here, reset the MV state.*/
-      grid[vx].valid=1;
-      grid[vx].right=etype;
-      grid[vx].down=etype;
-      level=OD_MC_LEVEL[vy&3][vx&3];
-      /*Motion vectors outside of the frame are not transmitted.*/
-      if(vx<2||vx>nhmvbs-2||vy<2||vy>nvmvbs-2)mv_row[vx].dr=level<4?-2:0;
-      else{
-        mv_row[vx].dr=-mv_row[vx].mv_rate;
-        /*Vertices on even levels require new edge labels.*/
-        if(!(level&1))mv_row[vx].dr-=ebits;
-        /*Except at the lowest level, vertices require on average 2 bits
-           indicating the presence of children.*/
-        if(level<4)mv_row[vx].dr-=2;
-      }
-    }
-  }
-}
-
-static void od_mv_est_init_du(od_mv_est_ctx *_est,int _ref,int _vx,int _vy){
-  od_state             *state;
-  od_mv_node           *dec;
-  od_mv_node           *merge;
-  const od_mv_err_node *errdom;
-  int                   nerrdom;
-  const od_offset      *mergedom;
-  int                   nhmvbs;
-  int                   nvmvbs;
-  int                   level;
-  int                   di;
-  int                   vx;
-  int                   vy;
-  int                   dx;
-  int                   dy;
-  /*fprintf(stderr,"Computing du's for (%i,%i)\n",_vx,_vy);*/
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  dec=_est->mvs[_vy]+_vx;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  errdom=OD_ERRDOM[level-1];
-  nerrdom=OD_NERRDOM[level-1];
-  mergedom=OD_MERGEDOM[level-1];
-  dec->dd=0;
-  /*Subtract off the error before decimation.*/
-  for(di=0;di<nerrdom;di++){
-    vx=_vx+errdom[di].dx;
-    vy=_vy+errdom[di].dy;
-    if(vx>=0&&vy>=0&&vx<nhmvbs&&vy<nvmvbs){
-      int mvb_sz;
-      mvb_sz=1<<errdom[di].log_mvb_sz;
-      for(dy=0;dy<mvb_sz;dy++){
-        for(dx=0;dx<mvb_sz;dx++){dec->dd-=_est->sad_cache[0][vy+dy][vx+dx][3];/*fprintf(stderr,"Added error (%i,%i) [%ix%i]: %i\n",vx+dx,vy+dy,4,4,dec->dd);*/}
-      }
-    }
-    /*else fprintf(stderr,"(%i,%i) outside [%i,%i]x[%i,%i]\n",vx,vy,0,0,nhmvbs,nvmvbs);*/
-  }
-  /*fprintf(stderr,"Subtracted initial error: %i\n",dec->dd);*/
-  /*Decimate the vertices in the merging domain.
-    Also sum up the rate changes while we do it.*/
-  for(di=0;;di++){
-    vx=_vx+mergedom[di][0];
-    if(vx<0||vx>nhmvbs)continue;
-    vy=_vy+mergedom[di][1];
-    if(vy<0||vy>nvmvbs)continue;
-    state->mv_grid[vy][vx].valid=0;
-    merge=_est->mvs[vy]+vx;
-    if(merge==dec)break;
-    dec->dr+=merge->dr;
-    /*fprintf(stderr,"Merged vertex (%2i,%2i), dr: %i\n",vx,vy,dec->dr);*/
-  }
-  /*fprintf(stderr,"Merged vertex (%2i,%2i)\n",vx,vy);*/
-  /*fprintf(stderr,"Decimated vertices in merging domain.\n");*/
-  /*Add in the error after decimation.*/
-  for(di=0;di<nerrdom;di++){
-    vx=_vx+errdom[di].dx;
-    vy=_vy+errdom[di].dy;
-    if(vx>=0&&vy>=0&&vx<nhmvbs&&vy<nvmvbs){
-      int log_mvb_sz;
-      log_mvb_sz=errdom[di].log_mvb_sz;
-      if(log_mvb_sz<2){
-        int mask;
-        int c;
-        int s;
-        mask=(1<<log_mvb_sz+1)-1;
-        c=!!(vx&mask);
-        if(vy&mask)c=3-c;
-        s=state->mv_grid[vy+(OD_VERT_DY[c+1&3]<<log_mvb_sz)][
-         vx+(OD_VERT_DX[c+1&3]<<log_mvb_sz)].valid|
-         state->mv_grid[vy+(OD_VERT_DY[c+3&3]<<log_mvb_sz)][
-         vx+(OD_VERT_DX[c+3&3]<<log_mvb_sz)].valid<<1;
-        dec->dd+=
-         _est->sad_cache[log_mvb_sz][vy>>log_mvb_sz][vx>>log_mvb_sz][s];
-        /*fprintf(stderr,"Added error (%i,%i) [%ix%i] {%i,%i}: %i\n",vx,vy,1<<log_mvb_sz+2,1<<log_mvb_sz+2,c,s,dec->dd);*/
-      }
-      else{
-        /*Cache the SAD for top-level blocks in the dd field, which is
-           otherwise unused (since they cannot be decimated).*/
-        _est->mvs[vy][vx].dd=od_mv_est_dec_sad_top8(_est,_ref,vx,vy);
-        dec->dd+=_est->mvs[vy][vx].dd;
-        /*fprintf(stderr,"Added error (%i,%i) [%ix%i]: %i\n",
-         vx,vy,1<<log_mvb_sz+2,1<<log_mvb_sz+2,dec->dd);*/
-      }
-    }
-  }
-  /*fprintf(stderr,"Total merging error: %i\n",dec->dd);*/
-  /*Restore the vertices in the merging domain.*/
-  for(di=0;;di++){
-    vx=_vx+mergedom[di][0];
-    if(vx<0||vx>nhmvbs)continue;
-    vy=_vy+mergedom[di][1];
-    if(vy<0||vy>nvmvbs)continue;
-    state->mv_grid[vy][vx].valid=1;
-    if(vx==_vx&&vy==_vy)break;
-  }
-  /*fprintf(stderr,"Restored vertices in merging domain.\n");*/
-  /*Add this node to the heap.*/
-  dec->heapi=_est->dec_nheap;
-  _est->dec_heap[_est->dec_nheap++]=dec;
-}
-
-static void od_mv_est_init_dus(od_mv_est_ctx *_est,int _ref){
-  od_state *state;
-  int       nhmvbs;
-  int       nvmvbs;
-  int       vx;
-  int       vy;
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  od_mv_est_calc_node_bits(_est);
-  /*fprintf(stderr,"Finished MV bits.\n");*/
-  od_mv_est_calc_sads(_est,_ref);
-  /*fprintf(stderr,"Finished SADs.\n");*/
-  /*Clear the merge heap.*/
-  _est->dec_nheap=0;
-  _est->dec_heap[0]=NULL;
-  /*The initialization is destructive to dr, and so must proceed by level from
-     top to bottom.*/
-  /*Level 1 vertices.*/
-  for(vy=2;vy<=nvmvbs;vy+=4){
-    for(vx=2;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-  }
-  /*Level 2 vertices.*/
-  for(vy=0;;vy+=2){
-    for(vx=2;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-    vy+=2;
-    if(vy>nvmvbs)break;
-    for(vx=0;vx<=nhmvbs;vx+=4)od_mv_est_init_du(_est,_ref,vx,vy);
-  }
-  /*Level 3 vertices.*/
-  for(vy=1;vy<=nvmvbs;vy+=2){
-    for(vx=1;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-  }
-  /*Level 4 vertices.*/
-  for(vy=0;;vy++){
-    for(vx=1;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-    vy++;
-    if(vy>nvmvbs)break;
-    for(vx=0;vx<=nhmvbs;vx+=2)od_mv_est_init_du(_est,_ref,vx,vy);
-  }
-  /*Make the node list into a proper heap.*/
-  od_mv_dec_heapify(_est);
-}
-
-static void od_mv_est_decimate(od_mv_est_ctx *_est,int _ref){
-  od_mv_node *dec;
-  od_state   *state;
-  int         nhmvbs;
-  int         nvmvbs;
-  int         vx;
-  int         vy;
-  od_mv_est_init_dus(_est,_ref);
-  /*fprintf(stderr,"%i %i %i %i\n",
-   _est->sad_cache[0][30][2][0],
-   _est->sad_cache[0][30][2][1],
-   _est->sad_cache[0][30][2][2],
-   _est->sad_cache[0][30][2][3]);
-  fprintf(stderr,"%i %i %i %i\n",
-   _est->sad_cache[0][31][2][0],
-   _est->sad_cache[0][31][2][1],
-   _est->sad_cache[0][31][2][2],
-   _est->sad_cache[0][31][2][3]);*/
-  state=&_est->enc->state;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  /*Mark each column full of vectors.*/
-  for(vx=0;vx<=nhmvbs;vx++)_est->col_counts[vx]=nvmvbs+1;
-  /*Mark each row full of vectors.*/
-  for(vy=0;vy<=nvmvbs;vy++)_est->row_counts[vy]=nhmvbs+1;
-  for(;;){
-    const od_offset *mergedom;
-    int              level;
-    int              di;
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-    if(daala_granule_basetime(state,state->cur_time)==ANI_FRAME){
-      char iter_label[16];
-      od_state_mc_predict(state,_ref);
-      od_state_fill_vis(state);
-      sprintf(iter_label,"ani%08i",state->ani_iter++);
-      od_state_dump_img(state,&state->vis_img,iter_label);
-    }
-#endif
-    dec=od_mv_dec_heap_delhead(_est);
-    /*Stop if we've fully decimated the mesh, or if this decimation would not
-       improve R-D performance at the current lambda.*/
-    if(dec==NULL||dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE)>0)break;
-    level=OD_MC_LEVEL[dec->vy&3][dec->vx&3];
-    /*fprintf(stderr,"Iteration %i; Merging node (%2i,%2i), level %i, dd %5i, dr %5i, dopt %5i:\n",
-     iteration,dec->vx,dec->vy,level,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));*/
-    mergedom=OD_MERGEDOM[level-1];
-    for(di=0;;di++){
-      od_mv_node      *merge;
-      od_mv_node      *ancestor;
-      od_mv_node      *block;
-      const od_offset *anc;
-      int              nanc;
-      int              ai;
-      int              ax;
-      int              ay;
-      int              bx;
-      int              by;
-      int              log_mvb_sz;
-      int              mask;
-      /*Don't decimate vertices outside of the mesh.*/
-      vx=dec->vx+mergedom[di][0];
-      if(vx<0||vx>nhmvbs)continue;
-      vy=dec->vy+mergedom[di][1];
-      if(vy<0||vy>nvmvbs)continue;
-      merge=_est->mvs[vy]+vx;
-      /*Don't decimate vertices that have already been decimated.*/
-      if(!state->mv_grid[vy][vx].valid){/*fprintf(stderr,"Skipping node (%i,%i) (already merged).\n",vx,vy);*/continue;}
-      /*fprintf(stderr,"Merging node (%2i,%2i), dd %5i, dr %5i:\n",vx,vy,
-       merge->dd,merge->dr);*/
-      /*Update the deltas for this vertex in the merging domain.
-        The simple rule applied below handles overlapped domains withan
-         inclusion-exclusion approach.
-        See Balmelli 2001 for details.*/
-      nanc=OD_NANCESTORS[vy&3][vx&3];
-      anc=OD_ANCESTORS[vy&3][vx&3];
-      for(ai=0;ai<nanc;ai++){
-        ax=vx+anc[ai][0];
-        if(ax<0||ax>nhmvbs)continue;
-        ay=vy+anc[ai][1];
-        if(ay<0||ay>nvmvbs)continue;
-        ancestor=_est->mvs[ay]+ax;
-        od_mv_dec_update(_est,ancestor,
-         ancestor->dd-merge->dd,ancestor->dr-merge->dr);
-        /*fprintf(stderr,"Updated ancestor (%2i,%2i) of (%2i,%2i): dd %5i, dr %5i\n",
-         ax,ay,vx,vy,ancestor->dd,ancestor->dr);*/
-      }
-      state->mv_grid[vy][vx].valid=0;
-      od_mv_dec_heap_del(_est,merge);
-      _est->col_counts[vx]--;
-      _est->row_counts[vy]--;
-      level=OD_MC_LEVEL[vy&3][vx&3];
-      log_mvb_sz=4-level>>1;
-      /*Account for quadrilaterals which may have only partially belonged to
-         the merging domain (e.g., that would not have belonged were we using
-         triangles).*/
-      if(!(level&1)){
-        static const int OD_CDX[4]={-1,1,-1,1};
-        static const int OD_CDY[4]={-1,-1,1,1};
-        int k;
-        mask=(1<<log_mvb_sz+1)-1;
-        for(k=0;k<4;k++){
-          int cx;
-          int cy;
-          int ddd;
-          int s;
-          cx=vx+(OD_CDX[k]<<log_mvb_sz);
-          if(cx<0||cx>nhmvbs)continue;
-          cy=vy+(OD_CDY[k]<<log_mvb_sz);
-          if(cy<0||cy>nvmvbs)continue;
-          bx=vx+(OD_ERRDOM4[k].dx<<log_mvb_sz);
-          by=vy+(OD_ERRDOM4[k].dy<<log_mvb_sz);
-          block=_est->mvs[by]+bx;
-          by>>=log_mvb_sz;
-          bx>>=log_mvb_sz;
-          if(!state->mv_grid[cy][cx].valid){
-            block->s=0;
-            block->sad=_est->sad_cache[log_mvb_sz][by][bx][0];
-            /*If the opposing corner has already been decimated, the remaining
-               adjustments have already been made.*/
-            continue;
-          }
-          /*s is the split state of the error block with (vx,vy) decimated, and
-             (cx,cy) undecimated.*/
-          s=1<<(((k+3&3)>>1)^!!(vx&mask));
-          block->s=s;
-          block->sad=_est->sad_cache[log_mvb_sz][by][bx][s];
-          /*Replace the old decimation error change with the new one.*/
-          ddd=_est->sad_cache[log_mvb_sz][by][bx][0]-
-           _est->sad_cache[log_mvb_sz][by][bx][s^3]+
-           _est->sad_cache[log_mvb_sz][by][bx][3]-
-           _est->sad_cache[log_mvb_sz][by][bx][s];
-          /*fprintf(stderr,"Checking opposing corner (%2i,%2i): ddd %i\n",
-           cx,cy,ddd);*/
-          /*This happens in regions of constant motion.*/
-          if(ddd==0)continue;
-          ancestor=_est->mvs[cy]+cx;
-          od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-          /*fprintf(stderr,"Updated corner (%2i,%2i): dd %5i, dr %5i\n",
-           cx,cy,ancestor->dd,ancestor->dr);*/
-          /*Update the opposing corner's ancestors, which also, of
-             necessity, must contain the affected quadrilateral, and must
-             not have been decimated yet.*/
-          nanc=OD_NANCESTORS[cy&3][cx&3];
-          anc=OD_ANCESTORS[cy&3][cx&3];
-          for(ai=0;ai<nanc;ai++){
-            ax=cx+anc[ai][0];
-            if(ax<0||ax>nhmvbs)continue;
-            ay=cy+anc[ai][1];
-            if(ay<0||ay>nvmvbs)continue;
-            ancestor=_est->mvs[ay]+ax;
-            od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-            /*fprintf(stderr,"Updated ancestor (%2i,%2i): dd %5i, dr %5i\n",
-             ax,ay,ancestor->dd,ancestor->dr);*/
-          }
-          /*Add back in the components that do not apply to the interior
-             corner.*/
-          ddd=-ddd;
-          if(vx&mask)cx=vx;
-          else cy=vy;
-          /*fprintf(stderr,"Checking interior corner (%2i,%2i): ddd %i\n",
-           cx,cy,ddd);*/
-          ancestor=_est->mvs[cy]+cx;
-          od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-          /*fprintf(stderr,"Updated corner (%2i,%2i): dd %5i, dr %5i\n",
-           cx,cy,ancestor->dd,ancestor->dr);*/
-          /*And update all the interior corner's ancestors, which also, of
-             necessity, must contain the affected quadrilateral, and must not
-             have been decimated yet.*/
-          nanc=OD_NANCESTORS[cy&3][cx&3];
-          anc=OD_ANCESTORS[cy&3][cx&3];
-          for(ai=0;ai<nanc;ai++){
-            ax=cx+anc[ai][0];
-            if(ax<0||ax>nhmvbs)continue;
-            ay=cy+anc[ai][1];
-            if(ay<0||ay>nvmvbs)continue;
-            ancestor=_est->mvs[ay]+ax;
-            od_mv_dec_update(_est,ancestor,ancestor->dd+ddd,ancestor->dr);
-            /*fprintf(stderr,"Updated ancestor (%2i,%2i): dd %5i, dr %5i\n",
-             ax,ay,ancestor->dd,ancestor->dr);*/
-          }
-        }
-      }
-      /*Otherwise, we eliminated several smaller blocks.
-        Update the SAD and block setup for the larger block that took their
-         place.*/
-      else{
-        int c;
-        bx=vx-(1<<log_mvb_sz);
-        by=vy-(1<<log_mvb_sz);
-        log_mvb_sz++;
-        mask=(1<<log_mvb_sz+1)-1;
-        c=!!(bx&mask);
-        if(by&mask)c=3-c;
-        block=_est->mvs[by]+bx;
-        block->log_mvb_sz=log_mvb_sz;
-        block->c=c;
-        block->s=3;
-        if(log_mvb_sz<2){
-          block->sad=
-           _est->sad_cache[log_mvb_sz][by>>log_mvb_sz][bx>>log_mvb_sz][3];
-        }
-        /*At the top level, we cached the SAD in the dd field.*/
-        else block->sad=block->dd;
-      }
-      /*If we just decimated our target vertex, stop.*/
-      if(merge==dec)break;
-    }
-  }
-  /*od_mv_est_check_rd_state(_est,_ref,2);*/
-  /*fprintf(stderr,"Finished merging.\n");*/
-  /*if(dec!=NULL){
-    fprintf(stderr,"Node (%i,%i) dd %i, dr %i, dopt %i: not enough.\n",
-     dec->vx,dec->vy,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));
-  }*/
-  /*if(state->mv_grid[31][1].valid){
-    dec=_est->mvs[31]+1;
-    fprintf(stderr,"(%i,%i) remains. dd: %5i, dr: %2i, dopt: %6i.\n",
-     dec->vx,dec->vy,dec->dd,dec->dr,
-     dec->dr*_est->lambda+(dec->dd<<OD_LAMBDA_SCALE));
-  }*/
-}
-
-
-
-/*STAGE 3: Iterated Dynamic Programming.*/
-
-
-
-/*The list of MVs that can be predicted by a level 0 MV, excluding those not
-   yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED0[17]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 2,-2},{ 1,-1},{ 2, 2},{ 1, 1},{ 0, 4},{ 4, 4},
-  /*The remaining ones are not.*/
-  {-2,-2},{ 0,-2},{-1,-1},{ 0,-1},{-1, 0},{-2, 0},
-  {-1, 1},{ 0, 1},{-2, 2},{ 0, 2},{-4, 4}
-};
-/*The list of MVs that can be predicted by a level 1 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED1[10]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 1,-1},{ 1, 1},
-  /*The remaining ones are not.*/
-  { 0,-2},{-1,-1},{ 0,-1},{-2, 0},{-1, 0},{-1, 1},{ 0, 1},{ 0, 2}
-};
-/*The list of MVs that can be predicted by a level 2 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED2[7]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 1,-1},{ 1, 1},
-  /*The remaining ones are not.*/
-  {-1,-1},{ 0,-1},{-1, 0},{-1, 1},{ 0, 1}
-};
-/*The list of MVs that can be predicted by a level 3 MV, excluding those
-   not yet considered by DP across rows.*/
-static const od_offset OD_ROW_PREDICTED3[3]={
-  /*These predicted MVs are NOT changeable by future MVs in the DP path.*/
-  { 0,-1},{-1, 0},{ 0, 1}
-};
-
-/*The list of MVs that can be predicted by a level 0 MV, excluding those not
-   yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED0[17]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  { 2, 2},{-2, 2},{-1, 1},{ 1, 1},{ 4, 4},
-  /*The remaining ones are not.*/
-  {-2,-2},{ 0,-2},{ 2,-2},{-1,-1},{ 0,-1},{ 1,-1},
-  {-2, 0},{-1, 0},{ 1, 0},{ 2, 0},{ 4, 0},{-4, 4}
-};
-/*The list of MVs that can be predicted by a level 1 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED1[10]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  {-1, 1},{ 1, 1},
-  /*The remaining ones are not.*/
-  { 0,-2},{-1,-1},{ 0,-1},{ 1,-1},{-2, 0},{-1, 0},{ 1, 0},{ 2, 0}
-};
-/*The list of MVs that can be predicted by a level 2 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED2[7]={
-  /*These predicted MVs are changeable by future MVs in the DP path.*/
-  {-1, 1},{ 1, 1},
-  /*The remaining ones are not.*/
-  {-1,-1},{ 0,-1},{ 1,-1},{-1, 0},{ 1, 0}
-};
-/*The list of MVs that can be predicted by a level 3 MV, excluding those
-   not yet considered by DP across columns.*/
-static const od_offset OD_COL_PREDICTED3[3]={
-  /*These predicted MVs are NOT changeable by future MVs in the DP path.*/
-  { 0,-1},{-1, 0},{ 1, 0}
-};
-
-/*The number of predicted MVs in each list.*/
-static const int OD_NPREDICTED[5]={17,10,7,3,0};
-/*The number of changeable predicted MVs in each list.*/
-static const int OD_NROW_PRED_CHANGEABLE[4]={6,2,2,0};
-/*The number of changeable predicted MVs in each list.*/
-static const int OD_NCOL_PRED_CHANGEABLE[4]={5,2,2,0};
-/*The lists of offsets to predicted MVs for each level.*/
-static const od_offset *const OD_ROW_PREDICTED[4]={
-  OD_ROW_PREDICTED0,
-  OD_ROW_PREDICTED1,
-  OD_ROW_PREDICTED2,
-  OD_ROW_PREDICTED3
-};
-/*The lists of offsets to predicted MVs for each level.*/
-static const od_offset *const OD_COL_PREDICTED[4]={
-  OD_COL_PREDICTED0,
-  OD_COL_PREDICTED1,
-  OD_COL_PREDICTED2,
-  OD_COL_PREDICTED3
-};
-
-/*The amount of history to restore in the trellis state to ensure predicted MVs
-   are evaluated correctly in row refinement.*/
-static const int OD_ROW_PRED_HIST_SIZE[5]={8,4,2,2,1};
-/*The amount of history to restore in the trellis state to ensure predicted MVs
-   are evaluated correctly in column refinement.*/
-static const int OD_COL_PRED_HIST_SIZE[5]={8,4,2,2,1};
-
-
-
-/*Returns the boundary case indicating which motion vector range edges the
-   current motion vector is abutting.
-  _vx:         The horizontal position of the node.
-  _vy:         The vertical position of the node.
-  _dx:         The horizontal component of the motion vector.
-  _dy:         The vertical component of the motion vector.
-  _dsz:        The amount the vector is being adjusted by.
-  _log_blk_sz: The log base 2 of the maximum size of a block the vector can
-                belong to.
-  Return: A set of flags indicating the boundary conditions, after the
-   documentation at OD_SQUARE_SITES.*/
-static int od_mv_est_get_boundary_case(od_state *_state,int _vx,int _vy,
- int _dx,int _dy,int _dsz,int _log_blk_sz){
-  int mvxmin;
-  int mvxmax;
-  int mvymin;
-  int mvymax;
-  int blk_sz;
-  int bx;
-  int by;
-  blk_sz=1<<_log_blk_sz;
-  bx=_vx-2<<2;
-  by=_vy-2<<2;
-  mvxmin=OD_MAXI(bx-blk_sz-32,-16)-(bx-blk_sz)<<3;
-  mvxmax=(OD_MINI(bx+blk_sz+32,_state->info.frame_width+16)-(bx+blk_sz)<<3)-
-   _dsz;
-  mvymin=OD_MAXI(by-blk_sz-32,-16)-(by-blk_sz)<<3;
-  mvymax=(OD_MINI(by+blk_sz+32,_state->info.frame_height+16)-(by+blk_sz)<<3)-
-   _dsz;
-  return (_dx<=mvxmin)|(_dx>=mvxmax)<<1|(_dy<=mvymin)<<2|(_dy>=mvymax)<<3;
-}
-
-/*Computes the SAD of the specified block.*/
-static ogg_int32_t od_mv_est_block_sad8(od_mv_est_ctx *_est,int _ref,
- od_mv_node *_block){
-  od_state      *state;
-  unsigned char  pred[16][16];
-  /*ogg_int32_t    ret;*/
-  state=&_est->enc->state;
-  /*fprintf(stderr,"Adding SAD (%3i,%3i) [%2ix%2i]: ",
-   _block->vx-2<<2,_block->vy-2<<2,
-   4<<_block->log_mvb_sz,4<<_block->log_mvb_sz);*/
-  od_state_pred_block_from_setup(state,pred[0],sizeof(pred[0]),_ref,0,
-   _block->vx,_block->vy,_block->c,_block->s,_block->log_mvb_sz);
-  return /*ret=*/od_state_sad8(state,pred[0],sizeof(pred[0]),1,
-   _block->vx-2<<2,_block->vy-2<<2,_block->log_mvb_sz+2);
-  /*fprintf(stderr,"%6i\n",ret);
-  return ret;*/
-}
-
-/*Gets the change in SAD for the blocks affected by the given DP node, using
-   the current state of the grid.*/
-static ogg_int32_t od_mv_dp_get_sad_change8(od_mv_est_ctx *_est,int _ref,
- od_mv_dp_node *_dp,ogg_int32_t _block_sads[8]){
-  int         bi;
-  ogg_int32_t dd;
-  dd=0;
-  for(bi=0;bi<_dp->nblocks;bi++){
-    od_mv_node *block;
-    block=_dp->blocks[bi];
-    _block_sads[bi]=od_mv_est_block_sad8(_est,_ref,block);
-    /*fprintf(stderr,"SAD change for block (%i,%i) [%ix%i]: %i-%i=%i\n",
-     block->vx,block->vy,1<<block->log_mvb_sz+2,1<<block->log_mvb_sz+2,
-     _block_sads[bi],block->sad,_block_sads[bi]-block->sad);*/
-    dd+=_block_sads[bi]-block->sad;
-  }
-  return dd;
-}
-
-/*Computes a rate adjustment for the predictors changed by following the given
-   trellis path.
-  As a side effect, enough of the trellis needed to evaluate that change is
-   loaded into the MV grid.
-  _pred:   The previously set up prediction update state.
-  _dp:     The current DP node.
-  _cstate: The DP state currently being examined.
-           Its MV must have already been placed in the grid.
-  _prevsi: The state index to follow in the previous DP node.
-  _mv_res: The motion vector resolution (0=1/8th pel to 2=1/2 pel).
-  Return: The change in rate for the preceding MVs.*/
-static int od_mv_dp_get_rate_change(od_state *_state,od_mv_dp_node *_dp,
- int *_cur_mv_rate,int _pred_mv_rates[17],int _prevsi,int _mv_res){
-  od_mv_node    *mv;
-  od_mv_grid_pt *mvg;
-  int            nhmvbs;
-  int            nvmvbs;
-  int            pred[2];
-  int            pi;
-  int            dr;
-  /*Move the state from the current trellis path into the grid.*/
-  if(_dp->min_predictor_node!=NULL){
-    int            pred_sis[8];
-    int            pred_si;
-    int            npreds;
-    od_mv_dp_node *pred_dp;
-    npreds=_dp-_dp->min_predictor_node;
-    /*if(npreds>8)fprintf(stderr,"Too far back!\n");*/
-    /*fprintf(stderr,"Restoring ");*/
-    /*First, follow the trellis path backwards to find the state used in each
-       node.*/
-    pred_si=pred_sis[npreds-1]=_prevsi;
-    for(pi=2;pi<=npreds;pi++){
-      pred_dp=_dp-pi;
-      pred_si=pred_dp[1].states[pred_si].prevsi;
-      if(pred_si>=pred_dp[0].nstates)pred_si-=pred_dp[0].nstates;
-      pred_sis[npreds-pi]=pred_si;
-    }
-    /*Then restore that state going FORWARDS.*/
-    for(pred_dp=_dp->min_predictor_node;pred_dp<_dp;pred_dp++){
-      pred_si=pred_sis[pred_dp-_dp->min_predictor_node];
-      /*Restore the state for this MV itself.*/
-      pred_dp->mv->mv_rate=pred_dp->states[pred_si].mv_rate;
-      mvg=pred_dp->mvg;
-      mvg->mv[0]=pred_dp->states[pred_si].mv[0];
-      mvg->mv[1]=pred_dp->states[pred_si].mv[1];
-      /*fprintf(stderr,"(%i,%i:%i)->(%i,%i) ",
-       pred_dp->mv->vx,pred_dp->mv->vy,pred_si,mvg->mv[0],mvg->mv[1]);*/
-      /*Restore the state for the MVs this one predicted.*/
-      for(pi=0;pi<pred_dp->npred_changeable;pi++){
-        pred_dp->predicted_mvs[pi]->mv_rate=
-         pred_dp->states[pred_si].pred_mv_rates[pi];
-      }
-    }
-    /*fprintf(stderr,"\n");*/
-  }
-  nhmvbs=_state->nhmbs+1<<2;
-  nvmvbs=_state->nvmbs+1<<2;
-  /*Compute the new rate for the current MV.*/
-  mv=_dp->mv;
-  if(mv->vx<2||mv->vx>nhmvbs-2||mv->vy<2||mv->vy>nvmvbs-2)*_cur_mv_rate=dr=0;
-  else{
-    od_state_get_predictor(_state,pred,mv->vx,mv->vy,
-     OD_MC_LEVEL[mv->vy&3][mv->vx&3],_mv_res);
-    mvg=_dp->mvg;
-    *_cur_mv_rate=od_mv_est_bits(
-     (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-    /*fprintf(stderr,"Current MV rate: %i-%i=%i\n",
-     *_cur_mv_rate,mv->mv_rate,*_cur_mv_rate-mv->mv_rate);*/
-    dr=*_cur_mv_rate-mv->mv_rate;
-    /*Compute the new rates for the MVs this one predicts.*/
-    /*fprintf(stderr,
-     "Calculating predicted pred_mv_rates for node (%i,%i):\n",
-     _dp->mv->vx,_dp->mv->vy);*/
-    for(pi=0;pi<_dp->npredicted;pi++){
-      mv=_dp->predicted_mvs[pi];
-      mvg=_dp->predicted_mvgs[pi];
-      od_state_get_predictor(_state,pred,mv->vx,mv->vy,
-       OD_MC_LEVEL[mv->vy&3][mv->vx&3],_mv_res);
-      _pred_mv_rates[pi]=od_mv_est_bits(
-       (mvg->mv[0]>>_mv_res)-pred[0],(mvg->mv[1]>>_mv_res)-pred[1]);
-      /*fprintf(stderr,"Calculated predicted mv_rate of %i for (%i,%i)\n",
-       _pred_mv_rates[pi],mv->vx,mv->vy);
-      fprintf(stderr,"Predictor was: (%i,%i)   MV was: (%i,%i)\n",
-       pred[0],pred[1],mvg->mv[0]>>_mv_res,mvg->mv[1]>>_mv_res);*/
-      /*fprintf(stderr,"Predicted MV (%i,%i) rate: %i-%i=%i\n",
-       mv->vx,mv->vy,_pred_mv_rates[pi],mv->mv_rate,
-       _pred_mv_rates[pi]-mv->mv_rate);*/
-      dr+=_pred_mv_rates[pi]-mv->mv_rate;
-    }
-  }
-  return dr;
-}
-
-#if defined(OD_DUMP_IMAGES)&&defined(OD_ANIMATE)
-static const unsigned char OD_YCbCr_BEDGE[3]= { 41,240,110};
-static const unsigned char OD_YCbCr_VEDGE[3]= {145, 54, 34};
-static const unsigned char OD_YCbCr_VBEDGE[3]={170,166, 16};
-
-static void od_mv_dp_animate_state(od_state *_state,int _ref,
- od_mv_dp_node *_dp,int _has_gap){
-  od_mv_dp_node *dp;
-  char           iter_label[16];
-  int            active_states[OD_DP_NSTATES_MAX<<1];
-  int            prev_active_states[OD_DP_NSTATES_MAX<<1];
-  int            nactive_states;
-  int            nprev_active_states;
-  int            state;
-  int            si;
-  int            x0;
-  int            y0;
-  od_state_mc_predict(_state,_ref);
-  od_state_fill_vis(_state);
-  /*Now, draw the current state of the DP.*/
-  /*First draw the candidate edge labels for the active trellis paths.*/
-  for(si=0;si<_dp->nstates;si++){
-    prev_active_states[si<<1]=si;
-    prev_active_states[si<<1|1]=si+_dp->nstates;
-  }
-  nprev_active_states=_dp->nstates<<1;
-  nactive_states=0;
-  dp=_dp;
-  do{
-    int has_vedge;
-    int has_bedge;
-    if(nactive_states>0){
-      x0=(dp[0].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-      y0=(dp[0].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-      has_vedge=has_bedge=0;
-      for(si=0;si<nprev_active_states;si++){
-        if(prev_active_states[si]<dp[0].nstates)has_bedge=1;
-        else has_vedge=1;
-      }
-      if(has_vedge||has_bedge){
-        int mvb_sz;
-        int x1;
-        int y1;
-        x1=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-        y1=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-        od_img_draw_line(&_state->vis_img,x0,y0,x1,y1,
-         has_vedge?has_bedge?OD_YCbCr_VBEDGE:OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-        if(dp[1].mv->vx-dp[0].mv->vx>1){
-          mvb_sz=dp[1].mv->vx-dp[0].mv->vx;
-          if(!_has_gap||dp+1!=_dp)mvb_sz>>=1;
-          if(!_state->mv_grid[dp[0].mv->vy][dp[0].mv->vx+mvb_sz].valid){
-            if(dp[0].mv->vy>=mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy-mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0-(mvb_sz<<3),x0+(mvb_sz<<3),y1,
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-            if(dp[0].mv->vy<=(_state->nvmbs+1<<2)-mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0+(mvb_sz<<3),x0+(mvb_sz<<3),y1,
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-          }
-        }
-        else if(dp[1].mv->vy-dp[0].mv->vy>1){
-          mvb_sz=dp[1].mv->vy-dp[0].mv->vy;
-          if(!_has_gap||dp+1!=_dp)mvb_sz>>=1;
-          if(!_state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx].valid){
-            if(dp[0].mv->vx>=mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx-mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0-(mvb_sz<<3),y0+(mvb_sz<<3),x1,y0+(mvb_sz<<3),
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-            if(dp[0].mv->vx<=(_state->nhmbs+1<<2)-mvb_sz&&
-             _state->mv_grid[dp[0].mv->vy+mvb_sz][dp[0].mv->vx+mvb_sz].valid){
-              od_img_draw_line(&_state->vis_img,
-               x0+(mvb_sz<<3),y0+(mvb_sz<<3),x1,y0+(mvb_sz<<3),
-               has_vedge?has_bedge?OD_YCbCr_VBEDGE:
-               OD_YCbCr_VEDGE:OD_YCbCr_BEDGE);
-            }
-          }
-        }
-      }
-    }
-    memcpy(active_states,prev_active_states,
-     sizeof(active_states[0])*nprev_active_states);
-    nactive_states=nprev_active_states;
-    /*Follow the chain backwards to find the new active states.*/
-    nprev_active_states=0;
-    for(si=0;si<nactive_states;si++){
-      int sj;
-      state=active_states[si];
-      if(state>=dp[0].nstates)state-=dp[0].nstates;
-      state=dp[0].states[state].prevsi;
-      for(sj=0;sj<nprev_active_states&&prev_active_states[sj]!=state;sj++);
-      if(sj>=nprev_active_states){
-        prev_active_states[nprev_active_states++]=state;
-      }
-    }
-  }
-  while((dp--)->states[0].prevsi>=0);
-  /*Now, draw all the candidate MVs in the active trellis paths.
-    These two steps used to be together; now they're apart.
-    Sorry for the mess that caused.*/
-  /*Redraw the MVs, so they appear over the edge labels above.*/
-  od_state_draw_mvs(_state);
-  for(si=0;si<_dp->nstates;si++){
-    prev_active_states[si<<1]=si;
-    prev_active_states[si<<1|1]=si+_dp->nstates;
-  }
-  nprev_active_states=_dp->nstates<<1;
-  nactive_states=0;
-  dp=_dp;
-  do{
-    if(nactive_states>0){
-      x0=(dp[0].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-      y0=(dp[0].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-      if(!_has_gap||dp+1!=_dp){
-        x0=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-        y0=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-        for(si=0;si<nactive_states;si++){
-          state=active_states[si];
-          if(state>=dp[1].nstates)state-=dp[1].nstates;
-          od_img_draw_line(&_state->vis_img,x0,y0,
-           x0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[0],2,2),
-           y0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[1],2,2),
-           OD_YCbCr_MVCAND);
-        }
-      }
-    }
-    memcpy(active_states,prev_active_states,
-     sizeof(active_states[0])*nprev_active_states);
-    nactive_states=nprev_active_states;
-    /*Follow the chain backwards to find the new active states.*/
-    nprev_active_states=0;
-    for(si=0;si<nactive_states;si++){
-      int sj;
-      state=active_states[si];
-      if(state>=dp[0].nstates)state-=dp[0].nstates;
-      state=dp[0].states[state].prevsi;
-      for(sj=0;sj<nprev_active_states&&prev_active_states[sj]!=state;sj++);
-      if(sj>=nprev_active_states){
-        prev_active_states[nprev_active_states++]=state;
-      }
-    }
-  }
-  while((dp--)->states[0].prevsi>=0);
-  /*Draw the first state's MV's.*/
-  x0=(dp[1].mv->vx-2<<3)+(OD_UMV_PADDING<<1);
-  y0=(dp[1].mv->vy-2<<3)+(OD_UMV_PADDING<<1);
-  for(si=0;si<nactive_states;si++){
-    state=active_states[si];
-    if(state>=dp[1].nstates)state-=dp[1].nstates;
-    od_img_draw_line(&_state->vis_img,x0,y0,
-     x0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[0],2,2),
-     y0+OD_DIV_ROUND_POW2(dp[1].states[state].mv[1],2,2),
-     OD_YCbCr_MVCAND);
-  }
-  sprintf(iter_label,"ani%08i",_state->ani_iter++);
-  od_state_dump_img(_state,&_state->vis_img,iter_label);
-}
-#endif
-
-/*Row refinement.*/
-
-static void od_mv_dp_row_init(od_mv_est_ctx *_est,od_mv_dp_node *_dp,
- int _vx,int _vy,od_mv_dp_node *_prev_dp){
-  od_state      *state;
-  int            nhmvbs;
-  int            nvmvbs;
-  state=&_est->enc->state;
-  _dp->mv=_est->mvs[_vy]+_vx;
-  _dp->mvg=state->mv_grid[_vy]+_vx;
-  _dp->original_mv[0]=_dp->mvg->mv[0];
-  _dp->original_mv[1]=_dp->mvg->mv[1];
-  _dp->original_etype=_dp->mvg->right;
-  _dp->original_mv_rate=_dp->mv->mv_rate;
-  nhmvbs=state->nhmbs+1<<2;
-  nvmvbs=state->nvmbs+1<<2;
-  if(_vx<2||_vx>nhmvbs-2||_vy<2||_vy>nvmvbs-2){
-    /*Strictly speaking, we may be used to predict others, but since our MV
-       can't possibly change, neither can their rate.*/
-    _dp->npredicted=_dp->npred_changeable=0;
-    /*No one else is used to predict us, or any other MV we predict.
-      However, we may still need to load the previous MV into the grid to
-       estimate our SADs properly.*/
-    _dp->min_predictor_node=_prev_dp;
-  }
-  else{
-    int level;
-    int pred_hist;
-    int npred;
-    int nchangeable;
-    int pi;
-    /*Get the list of MVs we help predict.*/
-    level=OD_MC_LEVEL[_vy&3][_vx&3];
-    /*fprintf(stderr,"Initializing node (%i,%i) [%i,%i] at level %i:\n",
-     _vx,_vy,_vx-2<<2,_vy-2<<2,level);*/
-    npred=nchangeable=0;
-    for(pi=0;pi<OD_NPREDICTED[level];pi++){
-      int px;
-      int py;
-      px=_vx+OD_ROW_PREDICTED[level][pi][0];
-      if(px<2||px>nhmvbs-2)continue;
-      py=_vy+OD_ROW_PREDICTED[level][pi][1];
-      if(py<2||py>nvmvbs-2)continue;
-      if(state->mv_grid[py][px].valid){
-        /*fprintf(stderr,"Adding (%i,%i) as a PREDICTED MV.\n",px,py);*/
-        _dp->predicted_mvgs[npred]=state->mv_grid[py]+px;
-        _dp->predicted_mvs[npred]=_est->mvs[py]+px;
-        if(pi<OD_NROW_PRED_CHANGEABLE[level]){
-          /*fprintf(stderr,"It is CHANGEABLE.\n");*/
-          _dp->original_mv_rates[npred]=_est->mvs[py][px].mv_rate;
-          nchangeable++;
-        }
-        npred++;
-      }
-    }
-    _dp->npredicted=npred;
-    _dp->npred_changeable=nchangeable;
-    /*Now, figure out the earliest DP node that influences our own prediction,
-       or that of one of the other MVs we predict.*/
-    pred_hist=OD_ROW_PRED_HIST_SIZE[level];
-    /*fprintf(stderr,"Marking history up to %i back: %i>=%i\n",
-     pred_hist,_prev_dp!=NULL?_prev_dp->mv->vx:-1,_vx-pred_hist);*/
-    if(_prev_dp!=NULL&&_prev_dp->mv->vx>=_vx-pred_hist){
-      od_mv_dp_node *dp_pred;
-      for(dp_pred=_prev_dp;dp_pred->mv->vx>_vx-pred_hist&&
-       dp_pred->states[0].prevsi>=0;dp_pred--);
-      /*fprintf(stderr,"Stopped at (%i,%i) (%i<=%i? %i) (%i<0? %i)\n",
-       dp_pred->mv->vx,dp_pred->mv->vy,dp_pred->mv->vx,_vx-pred_hist,
-       dp_pred->mv->vx<=_vx-pred_hist,
-       dp_pred->states[0].prevsi,dp_pred->states[0].prevsi<0);*/
-      if(dp_pred->mv->vx<_vx-pred_hist){dp_pred++;/*fprintf(stderr,"Too far, incrementing to (%i,%i).\n",dp_pred->mv->vx,dp_pred->mv->vy);*/}
-      _dp->min_predictor_node=dp_pred;
-      /*fprintf(stderr,"State will be restored back to (%i,%i).\n",
-       dp_pred->mv->vx,dp_pred->mv->vy);*/
-    }
-    else _dp->min_predictor_node=NULL;
-  }
-}
-
-static void od_mv_dp_first_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(_vx>2){
-    if(level>=3){
-      if(_vy>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(_vy<=nvmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-    }
-    else{
-      int half_mvb_sz;
-      int mvb_off;
-      half_mvb_sz=mvb_sz>>1;
-      if(_vy>=mvb_sz){
-        if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-          if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-          }
-          if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      }
-      if(_vy<=nvmvbs-mvb_sz){
-        if(state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-          if(level>0||
-           !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-            mvb_off=half_mvb_sz;
-          }
-          else mvb_off=half_mvb_sz>>1;
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_off;
-          if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-mvb_off;
-          }
-          if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-(mvb_off<<1);
-          }
-        }
-        else _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_prev_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       prev_level;
-  int       log_mvb_sz;
-  int       prev_log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  prev_level=OD_MC_LEVEL[_vy&3][_vx-mvb_sz&3];
-  prev_log_mvb_sz=4-prev_level>>1;
-  nblocks=0;
-  if(level>=3){
-    if(_vy>=mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_sz<<1)]+_vx-mvb_sz;
-      }
-    }
-    if(_vy<=nvmvbs-mvb_sz){
-      _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-      if(prev_log_mvb_sz>log_mvb_sz&&
-       !state->mv_grid[_vy+mvb_sz][_vx-mvb_sz].valid){
-        _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-      }
-    }
-  }
-  else{
-    int half_mvb_sz;
-    int mvb_off;
-    half_mvb_sz=mvb_sz>>1;
-    if(_vy>=mvb_sz){
-      if(state->mv_grid[_vy-half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy-(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-mvb_off;
-        if(!state->mv_grid[_vy-mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-mvb_off;
-        }
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_off]+_vx-(mvb_off<<1);
-          if(!state->mv_grid[_vy-mvb_off][_vx-(mvb_off<<1)].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_off<<1)]+_vx-(mvb_off<<1);
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx-mvb_sz;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy-mvb_sz][_vx-mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy-(mvb_sz<<1)]+_vx-mvb_sz;
-        }
-      }
-    }
-    if(_vy<=nvmvbs-mvb_sz){
-      if(state->mv_grid[_vy+half_mvb_sz][_vx-half_mvb_sz].valid){
-        if(level>0||
-         !state->mv_grid[_vy+(half_mvb_sz>>1)][_vx-(half_mvb_sz>>1)].valid){
-          mvb_off=half_mvb_sz;
-        }
-        else mvb_off=half_mvb_sz>>1;
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_off;
-        if(!state->mv_grid[_vy+mvb_off][_vx].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-mvb_off;
-        }
-        if(!state->mv_grid[_vy][_vx-mvb_off].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-(mvb_off<<1);
-          if(!state->mv_grid[_vy+mvb_off][_vx-(mvb_off<<1)].valid){
-            _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_off]+_vx-(mvb_off<<1);
-          }
-        }
-      }
-      else{
-        _dp->blocks[nblocks++]=_est->mvs[_vy]+_vx-mvb_sz;
-        if(prev_log_mvb_sz>log_mvb_sz&&
-         !state->mv_grid[_vy+mvb_sz][_vx-mvb_sz].valid){
-          _dp->blocks[nblocks++]=_est->mvs[_vy+mvb_sz]+_vx-mvb_sz;
-        }
-      }
-    }
-  }
-  _dp->nblocks=nblocks;
-}
-
-static void od_mv_dp_last_row_block_setup(od_mv_est_ctx *_est,
- od_mv_dp_node *_dp,int _vx,int _vy){
-  od_state *state;
-  int       nvmvbs;
-  int       level;
-  int       log_mvb_sz;
-  int       mvb_sz;
-  int       nblocks;
-  state=&_est->enc->state;
-  nvmvbs=state->nvmbs+1<<2;
-  level=OD_MC_LEVEL[_vy&3][_vx&3];
-  log_mvb_sz=4-level>>1;
-  mvb_sz=1<<log_mvb_sz;
-  nblocks=0;
-  if(level>=3){
-    if(_vy>=mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy-mvb_sz]+_vx;
-    if(_vy<=nvmvbs-mvb_sz)_dp->blocks[nblocks++]=_est->mvs[_vy]+_vx;
-  }