1 files changed, 169 insertions, 129 deletions
diff --git a/thirdparty/libtheora/state.c b/thirdparty/libtheora/state.c
index 42ed33a9a3..f4c6240387 100644
--- a/thirdparty/libtheora/state.c
+++ b/thirdparty/libtheora/state.c
@@ -11,25 +11,93 @@
  ********************************************************************
 
   function:
-    last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $
+    last mod: $Id$
 
  ********************************************************************/
 
 #include <stdlib.h>
 #include <string.h>
-#include "internal.h"
-#if defined(OC_X86_ASM)
-#if defined(_MSC_VER)
-# include "x86_vc/x86int.h"
-#else
-# include "x86/x86int.h"
-#endif
-#endif
+#include "state.h"
 #if defined(OC_DUMP_IMAGES)
 # include <stdio.h>
 # include "png.h"
+# include "zlib.h"
 #endif
 
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
+   +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
+   +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+  dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+  dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation (4:4:4).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  _cbmvs[0]=_lbmvs[0];
+  _cbmvs[1]=_lbmvs[1];
+  _cbmvs[2]=_lbmvs[2];
+  _cbmvs[3]=_lbmvs[3];
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
 /*Returns the fragment index of the top-left block in a macro block.
   This can be used to test whether or not the whole macro block is valid.
   _sb_map: The super block map.
@@ -92,7 +160,7 @@ static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
       if(jmax>4)jmax=4;
       else if(jmax<=0)break;
       /*By default, set all fragment indices to -1.*/
-      memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
+      memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
       /*Fill in the fragment map for this super block.*/
       xfrag=yfrag+x;
       for(i=0;i<imax;i++){
@@ -186,10 +254,14 @@ static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
   This version is for use with no chroma decimation (4:4:4).
   This uses the already filled-in luma plane values.
   _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.*/
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3]){
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
   int k;
+  (void)_xfrag0;
+  (void)_yfrag0;
   for(k=0;k<4;k++){
     _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
     _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
@@ -211,7 +283,7 @@ static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
   oc_mb_fill_cmapping00,
   oc_mb_fill_cmapping01,
   oc_mb_fill_cmapping10,
-  (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
+  oc_mb_fill_cmapping11
 };
 
 /*Fills in the mapping from macro blocks to their corresponding fragment
@@ -469,7 +541,7 @@ static void oc_state_frarray_clear(oc_theora_state *_state){
    unrestricted motion vectors without special casing the boundary.
   If chroma is decimated in either direction, the padding is reduced by a
    factor of 2 on the appropriate sides.
-  _nrefs: The number of reference buffers to init; must be 3 or 4.*/
+  _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
   th_info       *info;
   unsigned char *ref_frame_data;
@@ -481,6 +553,7 @@ static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
   int            yheight;
   int            chstride;
   int            cheight;
+  ptrdiff_t      align;
   ptrdiff_t      yoffset;
   ptrdiff_t      coffset;
   ptrdiff_t     *frag_buf_offs;
@@ -489,33 +562,38 @@ static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
   int            vdec;
   int            rfi;
   int            pli;
-  if(_nrefs<3||_nrefs>4)return TH_EINVAL;
+  if(_nrefs<3||_nrefs>6)return TH_EINVAL;
   info=&_state->info;
   /*Compute the image buffer parameters for each plane.*/
   hdec=!(info->pixel_fmt&1);
   vdec=!(info->pixel_fmt&2);
   yhstride=info->frame_width+2*OC_UMV_PADDING;
   yheight=info->frame_height+2*OC_UMV_PADDING;
-  chstride=yhstride>>hdec;
+  /*Require 16-byte aligned rows in the chroma planes.*/
+  chstride=(yhstride>>hdec)+15&~15;
   cheight=yheight>>vdec;
   yplane_sz=yhstride*(size_t)yheight;
   cplane_sz=chstride*(size_t)cheight;
   yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
   coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
-  ref_frame_sz=yplane_sz+2*cplane_sz;
+  /*Although we guarantee the rows of the chroma planes are a multiple of 16
+     bytes, the initial padding on the first row may only be 8 bytes.
+    Compute the offset needed to the actual image data to a multiple of 16.*/
+  align=-coffset&15;
+  ref_frame_sz=yplane_sz+2*cplane_sz+16;
   ref_frame_data_sz=_nrefs*ref_frame_sz;
   /*Check for overflow.
     The same caveats apply as for oc_state_frarray_init().*/
-  if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
+  if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
    ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
     return TH_EIMPL;
   }
-  ref_frame_data=_ogg_malloc(ref_frame_data_sz);
+  ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
   frag_buf_offs=_state->frag_buf_offs=
    _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
   if(ref_frame_data==NULL||frag_buf_offs==NULL){
     _ogg_free(frag_buf_offs);
-    _ogg_free(ref_frame_data);
+    oc_aligned_free(ref_frame_data);
     return TH_EFAULT;
   }
   /*Set up the width, height and stride for the image buffers.*/
@@ -532,15 +610,15 @@ static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
     memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
      sizeof(_state->ref_frame_bufs[0]));
   }
+  _state->ref_frame_handle=ref_frame_data;
   /*Set up the data pointers for the image buffers.*/
   for(rfi=0;rfi<_nrefs;rfi++){
-    _state->ref_frame_data[rfi]=ref_frame_data;
     _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
-    ref_frame_data+=yplane_sz;
+    ref_frame_data+=yplane_sz+align;
     _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
     ref_frame_data+=cplane_sz;
     _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
-    ref_frame_data+=cplane_sz;
+    ref_frame_data+=cplane_sz+(16-align);
     /*Flip the buffer upside down.
       This allows us to decode Theora's bottom-up frames in their natural
        order, yet return a top-down buffer with a positive stride to the user.*/
@@ -550,7 +628,7 @@ static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
   _state->ref_ystride[0]=-yhstride;
   _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
   /*Initialize the fragment buffer offsets.*/
-  ref_frame_data=_state->ref_frame_data[0];
+  ref_frame_data=_state->ref_frame_bufs[0][0].data;
   fragi=0;
   for(pli=0;pli<3;pli++){
     th_img_plane      *iplane;
@@ -576,41 +654,44 @@ static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
       vpix+=stride<<3;
     }
   }
-  /*Initialize the reference frame indices.*/
+  /*Initialize the reference frame pointers and indices.*/
   _state->ref_frame_idx[OC_FRAME_GOLD]=
    _state->ref_frame_idx[OC_FRAME_PREV]=
-   _state->ref_frame_idx[OC_FRAME_SELF]=-1;
-  _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1;
+   _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
+   _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
+   _state->ref_frame_idx[OC_FRAME_SELF]=
+   _state->ref_frame_idx[OC_FRAME_IO]=-1;
+  _state->ref_frame_data[OC_FRAME_GOLD]=
+   _state->ref_frame_data[OC_FRAME_PREV]=
+   _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
+   _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
+   _state->ref_frame_data[OC_FRAME_SELF]=
+   _state->ref_frame_data[OC_FRAME_IO]=NULL;
   return 0;
 }
 
 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
   _ogg_free(_state->frag_buf_offs);
-  _ogg_free(_state->ref_frame_data[0]);
+  oc_aligned_free(_state->ref_frame_handle);
 }
 
 
-void oc_state_vtable_init_c(oc_theora_state *_state){
+void oc_state_accel_init_c(oc_theora_state *_state){
+  _state->cpu_flags=0;
+#if defined(OC_STATE_USE_VTABLE)
   _state->opt_vtable.frag_copy=oc_frag_copy_c;
+  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
   _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
   _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
   _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
   _state->opt_vtable.idct8x8=oc_idct8x8_c;
   _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
-  _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
+  _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
   _state->opt_vtable.state_loop_filter_frag_rows=
    oc_state_loop_filter_frag_rows_c;
   _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
-  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
-}
-
-/*Initialize the accelerated function pointers.*/
-void oc_state_vtable_init(oc_theora_state *_state){
-#if defined(OC_X86_ASM)
-  oc_state_vtable_init_x86(_state);
-#else
-  oc_state_vtable_init_c(_state);
 #endif
+  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
 }
 
 
@@ -626,7 +707,8 @@ int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
      how it is specified in the bitstream, because the Y axis is flipped in
      the bitstream.
     The displayable frame must fit inside the encoded frame.
-    The color space must be one known by the encoder.*/
+    The color space must be one known by the encoder.
+    The framerate ratio must not contain a zero value.*/
   if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
    _info->frame_width<=0||_info->frame_width>=0x100000||
    _info->frame_height<=0||_info->frame_height>=0x100000||
@@ -639,7 +721,8 @@ int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
       but there are a number of compilers which will mis-optimize this.
      It's better to live with the spurious warnings.*/
    _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
-   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
+   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS||
+   _info->fps_numerator<1||_info->fps_denominator<1){
     return TH_EINVAL;
   }
   memset(_state,0,sizeof(*_state));
@@ -648,7 +731,7 @@ int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
      system.*/
   _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
   _state->frame_type=OC_UNKWN_FRAME;
-  oc_state_vtable_init(_state);
+  oc_state_accel_init(_state);
   ret=oc_state_frarray_init(_state);
   if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
   if(ret<0){
@@ -758,11 +841,10 @@ void oc_state_borders_fill(oc_theora_state *_state,int _refi){
             _offsets[1] is set if the motion vector has non-zero fractional
              components.
   _pli:     The color plane index.
-  _dx:      The X component of the motion vector.
-  _dy:      The Y component of the motion vector.
+  _mv:      The motion vector.
   Return: The number of offsets returned: 1 or 2.*/
 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
- int _pli,int _dx,int _dy){
+ int _pli,oc_mv _mv){
   /*Here is a brief description of how Theora handles motion vectors:
     Motion vector components are specified to half-pixel accuracy in
      undecimated directions of each plane, and quarter-pixel accuracy in
@@ -785,21 +867,25 @@ int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
   int xfrac;
   int yfrac;
   int offs;
+  int dx;
+  int dy;
   ystride=_state->ref_ystride[_pli];
   /*These two variables decide whether we are in half- or quarter-pixel
      precision in each component.*/
   xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
   yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
+  dx=OC_MV_X(_mv);
+  dy=OC_MV_Y(_mv);
   /*These two variables are either 0 if all the fractional bits are zero or -1
      if any of them are non-zero.*/
-  xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
-  yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
-  offs=(_dx>>xprec)+(_dy>>yprec)*ystride;
+  xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
+  yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
+  offs=(dx>>xprec)+(dy>>yprec)*ystride;
   if(xfrac||yfrac){
     int xmask;
     int ymask;
-    xmask=OC_SIGNMASK(_dx);
-    ymask=OC_SIGNMASK(_dy);
+    xmask=OC_SIGNMASK(dx);
+    ymask=OC_SIGNMASK(dy);
     yfrac&=ystride;
     _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
     _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
@@ -848,13 +934,17 @@ int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
   int mx2;
   int my2;
   int offs;
+  int dx;
+  int dy;
   ystride=_state->ref_ystride[_pli];
   qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
-  my=OC_MVMAP[qpy][_dy+31];
-  my2=OC_MVMAP2[qpy][_dy+31];
+  dx=OC_MV_X(_mv);
+  dy=OC_MV_Y(_mv);
+  my=OC_MVMAP[qpy][dy+31];
+  my2=OC_MVMAP2[qpy][dy+31];
   qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
-  mx=OC_MVMAP[qpx][_dx+31];
-  mx2=OC_MVMAP2[qpx][_dx+31];
+  mx=OC_MVMAP[qpx][dx+31];
+  mx2=OC_MVMAP2[qpx][dx+31];
   offs=my*ystride+mx;
   if(mx2||my2){
     _offsets[1]=offs+my2*ystride+mx2;
@@ -866,18 +956,12 @@ int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
 #endif
 }
 
-void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
-  _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
-   _last_zzi,_dc_quant);
-}
-
 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
   unsigned char *dst;
   ptrdiff_t      frag_buf_off;
   int            ystride;
-  int            mb_mode;
+  int            refi;
   /*Apply the inverse transform.*/
   /*Special case only having a DC component.*/
   if(_last_zzi<2){
@@ -887,69 +971,35 @@ void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
        no iDCT rounding.*/
     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
     /*LOOP VECTORIZES.*/
-    for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p;
+    for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
   }
   else{
     /*First, dequantize the DC coefficient.*/
     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8(_state,_dct_coeffs,_last_zzi);
+    oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
   }
   /*Fill in the target buffer.*/
   frag_buf_off=_state->frag_buf_offs[_fragi];
-  mb_mode=_state->frags[_fragi].mb_mode;
+  refi=_state->frags[_fragi].refi;
   ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
-  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs);
+  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
+  if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
   else{
     const unsigned char *ref;
     int                  mvoffsets[2];
-    ref=
-     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
-     +frag_buf_off;
+    ref=_state->ref_frame_data[refi]+frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+     _state->frag_mvs[_fragi])>1){
       oc_frag_recon_inter2(_state,
-       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
+       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
+    }
+    else{
+      oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
     }
-    else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs);
-  }
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
-   frame to another.
-  _fragis:    A pointer to a list of fragment indices.
-  _nfragis:   The number of fragment indices to copy.
-  _dst_frame: The reference frame to copy to.
-  _src_frame: The reference frame to copy from.
-  _pli:       The color plane the fragments lie in.*/
-void oc_state_frag_copy_list(const oc_theora_state *_state,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
- int _dst_frame,int _src_frame,int _pli){
-  _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
-   _src_frame,_pli);
-}
-
-void oc_state_frag_copy_list_c(const oc_theora_state *_state,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
- int _dst_frame,int _src_frame,int _pli){
-  const ptrdiff_t     *frag_buf_offs;
-  const unsigned char *src_frame_data;
-  unsigned char       *dst_frame_data;
-  ptrdiff_t            fragii;
-  int                  ystride;
-  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
-  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
-  ystride=_state->ref_ystride[_pli];
-  frag_buf_offs=_state->frag_buf_offs;
-  for(fragii=0;fragii<_nfragis;fragii++){
-    ptrdiff_t frag_buf_off;
-    frag_buf_off=frag_buf_offs[_fragis[fragii]];
-    oc_frag_copy(_state,dst_frame_data+frag_buf_off,
-     src_frame_data+frag_buf_off,ystride);
   }
 }
 
-static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
   int y;
   _pix-=2;
   for(y=0;y<8;y++){
@@ -965,7 +1015,7 @@ static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
   }
 }
 
-static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
   int x;
   _pix-=_ystride*2;
   for(x=0;x<8;x++){
@@ -982,20 +1032,16 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
 
 /*Initialize the bounding values array used by the loop filter.
   _bv: Storage for the array.
-  Return: 0 on success, or a non-zero value if no filtering need be applied.*/
-int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
-  int flimit;
+  _flimit: The filter limit as defined in Section 7.10 of the spec.*/
+void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
   int i;
-  flimit=_state->loop_filter_limits[_state->qis[0]];
-  if(flimit==0)return 1;
   memset(_bv,0,sizeof(_bv[0])*256);
-  for(i=0;i<flimit;i++){
-    if(127-i-flimit>=0)_bv[127-i-flimit]=i-flimit;
-    _bv[127-i]=-i;
-    _bv[127+i]=i;
-    if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i;
+  for(i=0;i<_flimit;i++){
+    if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
+    _bv[127-i]=(signed char)(-i);
+    _bv[127+i]=(signed char)(i);
+    if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
   }
-  return 0;
 }
 
 /*Apply the loop filter to a given set of fragment rows in the given plane.
@@ -1006,14 +1052,8 @@ int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
   _pli:       The color plane to filter.
   _fragy0:    The Y coordinate of the first fragment row to filter.
   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
- int _refi,int _pli,int _fragy0,int _fragy_end){
-  _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
-   _fragy0,_fragy_end);
-}
-
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
   const oc_fragment_plane *fplane;
   const oc_fragment       *frags;
   const ptrdiff_t         *frag_buf_offs;
@@ -1030,7 +1070,7 @@ void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
   fragi_top=fplane->froffset;
   fragi_bot=fragi_top+fplane->nfrags;
   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
-  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
   ystride=_state->ref_ystride[_pli];
   frags=_state->frags;
   frag_buf_offs=_state->frag_buf_offs;