Move zig-zagging from quantization into the fDCT.
[theora.git] / lib / analyze.c
index 842bda4..30b221a 100644 (file)
@@ -667,8 +667,9 @@ static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
  oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,
  unsigned _rd_scale,unsigned _rd_iscale,oc_rd_metric *_mo,
  oc_fr_state *_fr,oc_token_checkpoint **_stack){
-  ogg_int16_t            *dct;
   ogg_int16_t            *data;
+  ogg_int16_t            *dct;
+  ogg_int16_t            *idct;
   oc_qii_state            qs;
   const ogg_uint16_t     *dequant;
   ogg_uint16_t            dequant_dc;
@@ -701,6 +702,7 @@ static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
   qii=frags[_fragi].qii;
   data=_enc->pipe.dct_data;
   dct=data+64;
+  idct=data+128;
   if(qii&~3){
 #if !defined(OC_COLLECT_METRICS)
     if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
@@ -771,12 +773,12 @@ static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
   /*Tokenize.*/
   checkpoint=*_stack;
   if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
-    ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
-     _stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+    ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,idct,data,dequant,dct,
+     nonzero+1,_stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
   }
   else{
-    ac_bits=oc_enc_tokenize_ac_fast(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
-     _stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+    ac_bits=oc_enc_tokenize_ac_fast(_enc,_pli,_fragi,idct,data,dequant,dct,
+     nonzero+1,_stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
   }
   /*Reconstruct.
     TODO: nonzero may need to be adjusted after tokenization.*/
@@ -798,8 +800,9 @@ static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
     else if(qi01>=0)qii=0;
   }
   else{
-    data[0]=dc*dequant_dc;
-    oc_idct8x8(&_enc->state,data,data,nonzero+1);
+    idct[0]=dc*dequant_dc;
+    /*Note: This clears idct[] back to zero for the next block.*/
+    oc_idct8x8(&_enc->state,data,idct,nonzero+1);
   }
   frags[_fragi].qii=qii;
   if(nqis>1){
@@ -1082,7 +1085,7 @@ static void oc_enc_mode_rd_init(oc_enc_ctx *_enc){
         dx=OC_MODE_LOGQ[modeline][pli][qti]-log_plq;
         dq=OC_MODE_LOGQ[modeline][pli][qti]-OC_MODE_LOGQ[modeline+1][pli][qti];
         if(dq==0)dq=1;
-        for(bin=0;bin<OC_SAD_BINS;bin++){
+        for(bin=0;bin<OC_COMP_BINS;bin++){
           int y0;
           int z0;
           int dy;
@@ -1115,15 +1118,15 @@ static unsigned oc_dct_cost2(oc_enc_ctx *_enc,unsigned *_ssd,
   /*SATD metrics for chroma planes vary much less than luma, so we scale them
      by 4 to distribute them into the mode decision bins more evenly.*/
   _satd<<=_pli+1&2;
-  bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2);
-  dx=_satd-(bin<<OC_SAD_SHIFT);
+  bin=OC_MINI(_satd>>OC_SATD_SHIFT,OC_COMP_BINS-2);
+  dx=_satd-(bin<<OC_SATD_SHIFT);
   y0=_enc->mode_rd[_qii][_pli][_qti][bin].rate;
   z0=_enc->mode_rd[_qii][_pli][_qti][bin].rmse;
   dy=_enc->mode_rd[_qii][_pli][_qti][bin+1].rate-y0;
   dz=_enc->mode_rd[_qii][_pli][_qti][bin+1].rmse-z0;
-  rmse=OC_MAXI(z0+(dz*dx>>OC_SAD_SHIFT),0);
+  rmse=OC_MAXI(z0+(dz*dx>>OC_SATD_SHIFT),0);
   *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
-  return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0);
+  return OC_MAXI(y0+(dy*dx>>OC_SATD_SHIFT),0);
 }
 
 /*activity_avg must be positive, or flat regions could get a zero weight, which
@@ -1220,7 +1223,7 @@ static unsigned oc_mb_activity(oc_enc_ctx *_enc,unsigned _mbi,
 }
 
 static unsigned oc_mb_activity_fast(oc_enc_ctx *_enc,unsigned _mbi,
- unsigned _activity[4], unsigned _intra_satd[12]){
+ unsigned _activity[4],const unsigned _intra_satd[12]){
   int bi;
   for(bi=0;bi<4;bi++){
     unsigned act;