summaryrefslogtreecommitdiff
path: root/drivers/theora/mcenc.c
diff options
context:
space:
mode:
authorJuan Linietsky <reduzio@gmail.com>2014-02-09 22:10:30 -0300
committerJuan Linietsky <reduzio@gmail.com>2014-02-09 22:10:30 -0300
commit0b806ee0fc9097fa7bda7ac0109191c9c5e0a1ac (patch)
tree276c4d099e178eb67fbd14f61d77b05e3808e9e3 /drivers/theora/mcenc.c
parent0e49da1687bc8192ed210947da52c9e5c5f301bb (diff)
GODOT IS OPEN SOURCE
Diffstat (limited to 'drivers/theora/mcenc.c')
-rw-r--r--drivers/theora/mcenc.c767
1 files changed, 767 insertions, 0 deletions
diff --git a/drivers/theora/mcenc.c b/drivers/theora/mcenc.c
new file mode 100644
index 0000000000..797e81f4f9
--- /dev/null
+++ b/drivers/theora/mcenc.c
@@ -0,0 +1,767 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+typedef struct oc_mcenc_ctx oc_mcenc_ctx;
+
+
+
+/*Temporary state used for motion estimation.*/
+struct oc_mcenc_ctx{
+ /*The candidate motion vectors.*/
+ int candidates[13][2];
+ /*The start of the Set B candidates.*/
+ int setb0;
+ /*The total number of candidates.*/
+ int ncandidates;
+};
+
+
+
+/*The maximum Y plane SAD value for accepting the median predictor.*/
+#define OC_YSAD_THRESH1 (256)
+/*The amount to right shift the minimum error by when inflating it for
+ computing the second maximum Y plane SAD threshold.*/
+#define OC_YSAD_THRESH2_SCALE_BITS (4)
+/*The amount to add to the second maximum Y plane threshold when inflating
+ it.*/
+#define OC_YSAD_THRESH2_OFFSET (64)
+
+/*The vector offsets in the X direction for each search site in the square
+ pattern.*/
+static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
+/*The vector offsets in the Y direction for each search site in the square
+ pattern.*/
+static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
+/*The number of sites to search for each boundary condition in the square
+ pattern.
+ Bit flags for the boundary conditions are as follows:
+ 1: -16==dx
+ 2: dx==15(.5)
+ 4: -16==dy
+ 8: dy==15(.5)*/
+static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
+/*The list of sites to search for each boundary condition in the square
+ pattern.*/
+static const int OC_SQUARE_SITES[11][8]={
+ /* -15.5<dx<31, -15.5<dy<15(.5)*/
+ {0,1,2,3,5,6,7,8},
+ /*-15.5==dx, -15.5<dy<15(.5)*/
+ {1,2,5,7,8},
+ /* dx==15(.5), -15.5<dy<15(.5)*/
+ {0,1,3,6,7},
+ /*-15.5==dx==15(.5), -15.5<dy<15(.5)*/
+ {-1},
+ /* -15.5<dx<15(.5), -15.5==dy*/
+ {3,5,6,7,8},
+ /*-15.5==dx, -15.5==dy*/
+ {5,7,8},
+ /* dx==15(.5), -15.5==dy*/
+ {3,6,7},
+ /*-15.5==dx==15(.5), -15.5==dy*/
+ {-1},
+ /*-15.5dx<15(.5), dy==15(.5)*/
+ {0,1,2,3,5},
+ /*-15.5==dx, dy==15(.5)*/
+ {1,2,5},
+ /* dx==15(.5), dy==15(.5)*/
+ {0,1,3}
+};
+
+
+static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
+ int _accum[2],int _mbi,int _frame){
+ oc_mb_enc_info *embs;
+ int a[3][2];
+ int ncandidates;
+ unsigned nmbi;
+ int i;
+ embs=_enc->mb_info;
+ /*Skip a position to store the median predictor in.*/
+ ncandidates=1;
+ if(embs[_mbi].ncneighbors>0){
+ /*Fill in the first part of set A: the vectors from adjacent blocks.*/
+ for(i=0;i<embs[_mbi].ncneighbors;i++){
+ nmbi=embs[_mbi].cneighbors[i];
+ _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
+ _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
+ ncandidates++;
+ }
+ }
+ /*Add a few additional vectors to set A: the vectors used in the previous
+ frames and the (0,0) vector.*/
+ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31);
+ _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
+ ncandidates++;
+ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+ embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
+ _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+ embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
+ ncandidates++;
+ _mcenc->candidates[ncandidates][0]=0;
+ _mcenc->candidates[ncandidates][1]=0;
+ ncandidates++;
+ /*Use the first three vectors of set A to find our best predictor: their
+ median.*/
+ memcpy(a,_mcenc->candidates+1,sizeof(a));
+ OC_SORT2I(a[0][0],a[1][0]);
+ OC_SORT2I(a[0][1],a[1][1]);
+ OC_SORT2I(a[1][0],a[2][0]);
+ OC_SORT2I(a[1][1],a[2][1]);
+ OC_SORT2I(a[0][0],a[1][0]);
+ OC_SORT2I(a[0][1],a[1][1]);
+ _mcenc->candidates[0][0]=a[1][0];
+ _mcenc->candidates[0][1]=a[1][1];
+ /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
+ _mcenc->setb0=ncandidates;
+ /*The first time through the loop use the current macro block.*/
+ nmbi=_mbi;
+ for(i=0;;i++){
+ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+ 2*embs[_mbi].analysis_mv[1][_frame][0]
+ -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
+ _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+ 2*embs[_mbi].analysis_mv[1][_frame][1]
+ -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
+ ncandidates++;
+ if(i>=embs[_mbi].npneighbors)break;
+ nmbi=embs[_mbi].pneighbors[i];
+ }
+ /*Truncate to full-pel positions.*/
+ for(i=0;i<ncandidates;i++){
+ _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]);
+ _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]);
+ }
+ _mcenc->ncandidates=ncandidates;
+}
+
+#if 0
+static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+ unsigned err;
+ int bi;
+ err=0;
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t frag_offs;
+ frag_offs=_frag_buf_offs[_fragis[bi]];
+ err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
+ _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+ }
+ return err;
+}
+#endif
+
+static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+ unsigned err;
+ int bi;
+ err=0;
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t frag_offs;
+ frag_offs=_frag_buf_offs[_fragis[bi]];
+ err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
+ _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+ }
+ return err;
+}
+
+static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _block_err[4]){
+ unsigned err;
+ int mvoffset;
+ int bi;
+ mvoffset=_dx+_dy*_ystride;
+ err=0;
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t frag_offs;
+ unsigned block_err;
+ frag_offs=_frag_buf_offs[_fragis[bi]];
+ block_err=oc_enc_frag_sad(_enc,
+ _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+ _block_err[bi]=block_err;
+ err+=block_err;
+ }
+ return err;
+}
+
+static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+ int mvoffset;
+ int err;
+ int bi;
+ mvoffset=_dx+_dy*_ystride;
+ err=0;
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t frag_offs;
+ frag_offs=_frag_buf_offs[_fragis[bi]];
+ err+=oc_enc_frag_satd_thresh(_enc,
+ _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
+ }
+ return err;
+}
+
+static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
+ ptrdiff_t _frag_offs,int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+ return oc_enc_frag_satd_thresh(_enc,
+ _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
+}
+
+/*Perform a motion vector search for this macro block against a single
+ reference frame.
+ As a bonus, individual block motion vectors are computed as well, as much of
+ the work can be shared.
+ The actual motion vector is stored in the appropriate place in the
+ oc_mb_enc_info structure.
+ _mcenc: The motion compensation context.
+ _accum: Drop frame/golden MV accumulators.
+ _mbi: The macro block index.
+ _frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/
+void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){
+ /*Note: Traditionally this search is done using a rate-distortion objective
+ function of the form D+lambda*R.
+ However, xiphmont tested this and found it produced a small degredation,
+ while requiring extra computation.
+ This is most likely due to Theora's peculiar MV encoding scheme: MVs are
+ not coded relative to a predictor, and the only truly cheap way to use a
+ MV is in the LAST or LAST2 MB modes, which are not being considered here.
+ Therefore if we use the MV found here, it's only because both LAST and
+ LAST2 performed poorly, and therefore the MB is not likely to be uniform
+ or suffer from the aperture problem.
+ Furthermore we would like to re-use the MV found here for as many MBs as
+ possible, so picking a slightly sub-optimal vector to save a bit or two
+ may cause increased degredation in many blocks to come.
+ We could artificially reduce lambda to compensate, but it's faster to just
+ disable it entirely, and use D (the distortion) as the sole criterion.*/
+ oc_mcenc_ctx mcenc;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *fragis;
+ const unsigned char *src;
+ const unsigned char *ref;
+ int ystride;
+ oc_mb_enc_info *embs;
+ ogg_int32_t hit_cache[31];
+ ogg_int32_t hitbit;
+ unsigned best_block_err[4];
+ unsigned block_err[4];
+ unsigned best_err;
+ int best_vec[2];
+ int best_block_vec[4][2];
+ int candx;
+ int candy;
+ int bi;
+ embs=_enc->mb_info;
+ /*Find some candidate motion vectors.*/
+ oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
+ /*Clear the cache of locations we've examined.*/
+ memset(hit_cache,0,sizeof(hit_cache));
+ /*Start with the median predictor.*/
+ candx=mcenc.candidates[0][0];
+ candy=mcenc.candidates[0][1];
+ hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ fragis=_enc->state.mb_maps[_mbi][0];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+ ystride=_enc->state.ref_ystride[0];
+ /*TODO: customize error function for speed/(quality+size) tradeoff.*/
+ best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+ best_vec[0]=candx;
+ best_vec[1]=candy;
+ if(_frame==OC_FRAME_PREV){
+ for(bi=0;bi<4;bi++){
+ best_block_err[bi]=block_err[bi];
+ best_block_vec[bi][0]=candx;
+ best_block_vec[bi][1]=candy;
+ }
+ }
+ /*If this predictor fails, move on to set A.*/
+ if(best_err>OC_YSAD_THRESH1){
+ unsigned err;
+ unsigned t2;
+ int ncs;
+ int ci;
+ /*Compute the early termination threshold for set A.*/
+ t2=embs[_mbi].error[_frame];
+ ncs=OC_MINI(3,embs[_mbi].ncneighbors);
+ for(ci=0;ci<ncs;ci++){
+ t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
+ }
+ t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
+ /*Examine the candidates in set A.*/
+ for(ci=1;ci<mcenc.setb0;ci++){
+ candx=mcenc.candidates[ci][0];
+ candy=mcenc.candidates[ci][1];
+ /*If we've already examined this vector, then we would be using it if it
+ was better than what we are using.*/
+ hitbit=(ogg_int32_t)1<<candx+15;
+ if(hit_cache[candy+15]&hitbit)continue;
+ hit_cache[candy+15]|=hitbit;
+ err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+ if(err<best_err){
+ best_err=err;
+ best_vec[0]=candx;
+ best_vec[1]=candy;
+ }
+ if(_frame==OC_FRAME_PREV){
+ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+ best_block_err[bi]=block_err[bi];
+ best_block_vec[bi][0]=candx;
+ best_block_vec[bi][1]=candy;
+ }
+ }
+ }
+ if(best_err>t2){
+ /*Examine the candidates in set B.*/
+ for(;ci<mcenc.ncandidates;ci++){
+ candx=mcenc.candidates[ci][0];
+ candy=mcenc.candidates[ci][1];
+ hitbit=(ogg_int32_t)1<<candx+15;
+ if(hit_cache[candy+15]&hitbit)continue;
+ hit_cache[candy+15]|=hitbit;
+ err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+ if(err<best_err){
+ best_err=err;
+ best_vec[0]=candx;
+ best_vec[1]=candy;
+ }
+ if(_frame==OC_FRAME_PREV){
+ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+ best_block_err[bi]=block_err[bi];
+ best_block_vec[bi][0]=candx;
+ best_block_vec[bi][1]=candy;
+ }
+ }
+ }
+ /*Use the same threshold for set B as in set A.*/
+ if(best_err>t2){
+ int best_site;
+ int nsites;
+ int sitei;
+ int site;
+ int b;
+ /*Square pattern search.*/
+ for(;;){
+ best_site=4;
+ /*Compose the bit flags for boundary conditions.*/
+ b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
+ OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
+ nsites=OC_SQUARE_NSITES[b];
+ for(sitei=0;sitei<nsites;sitei++){
+ site=OC_SQUARE_SITES[b][sitei];
+ candx=best_vec[0]+OC_SQUARE_DX[site];
+ candy=best_vec[1]+OC_SQUARE_DY[site];
+ hitbit=(ogg_int32_t)1<<candx+15;
+ if(hit_cache[candy+15]&hitbit)continue;
+ hit_cache[candy+15]|=hitbit;
+ err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+ if(err<best_err){
+ best_err=err;
+ best_site=site;
+ }
+ if(_frame==OC_FRAME_PREV){
+ for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+ best_block_err[bi]=block_err[bi];
+ best_block_vec[bi][0]=candx;
+ best_block_vec[bi][1]=candy;
+ }
+ }
+ }
+ if(best_site==4)break;
+ best_vec[0]+=OC_SQUARE_DX[best_site];
+ best_vec[1]+=OC_SQUARE_DY[best_site];
+ }
+ /*Final 4-MV search.*/
+ /*Simply use 1/4 of the macro block set A and B threshold as the
+ individual block threshold.*/
+ if(_frame==OC_FRAME_PREV){
+ t2>>=2;
+ for(bi=0;bi<4;bi++){
+ if(best_block_err[bi]>t2){
+ /*Square pattern search.
+ We do this in a slightly interesting manner.
+ We continue to check the SAD of all four blocks in the
+ macro block.
+ This gives us two things:
+ 1) We can continue to use the hit_cache to avoid duplicate
+ checks.
+ Otherwise we could continue to read it, but not write to it
+ without saving and restoring it for each block.
+ Note that we could still eliminate a large number of
+ duplicate checks by taking into account the site we came
+ from when choosing the site list.
+ We can still do that to avoid extra hit_cache queries, and
+ it might even be a speed win.
+ 2) It gives us a slightly better chance of escaping local
+ minima.
+ We would not be here if we weren't doing a fairly bad job
+ in finding a good vector, and checking these vectors can
+ save us from 100 to several thousand points off our SAD 1
+ in 15 times.
+ TODO: Is this a good idea?
+ Who knows.
+ It needs more testing.*/
+ for(;;){
+ int bestx;
+ int besty;
+ int bj;
+ bestx=best_block_vec[bi][0];
+ besty=best_block_vec[bi][1];
+ /*Compose the bit flags for boundary conditions.*/
+ b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
+ OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
+ nsites=OC_SQUARE_NSITES[b];
+ for(sitei=0;sitei<nsites;sitei++){
+ site=OC_SQUARE_SITES[b][sitei];
+ candx=bestx+OC_SQUARE_DX[site];
+ candy=besty+OC_SQUARE_DY[site];
+ hitbit=(ogg_int32_t)1<<candx+15;
+ if(hit_cache[candy+15]&hitbit)continue;
+ hit_cache[candy+15]|=hitbit;
+ err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+ if(err<best_err){
+ best_err=err;
+ best_vec[0]=candx;
+ best_vec[1]=candy;
+ }
+ for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
+ best_block_err[bj]=block_err[bj];
+ best_block_vec[bj][0]=candx;
+ best_block_vec[bj][1]=candy;
+ }
+ }
+ if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
+ candx=best_vec[0];
+ candy=best_vec[1];
+ embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
+ frag_buf_offs,fragis,candx,candy,src,ref,ystride);
+ embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
+ embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
+ if(_frame==OC_FRAME_PREV){
+ for(bi=0;bi<4;bi++){
+ candx=best_block_vec[bi][0];
+ candy=best_block_vec[bi][1];
+ embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
+ frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
+ embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
+ embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
+ }
+ }
+}
+
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
+ oc_mv2 *mvs;
+ int accum_p[2];
+ int accum_g[2];
+ mvs=_enc->mb_info[_mbi].analysis_mv;
+ if(_enc->prevframe_dropped){
+ accum_p[0]=mvs[0][OC_FRAME_PREV][0];
+ accum_p[1]=mvs[0][OC_FRAME_PREV][1];
+ }
+ else accum_p[1]=accum_p[0]=0;
+ accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
+ accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
+ mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
+ mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
+ /*Move the motion vector predictors back a frame.*/
+ memmove(mvs+1,mvs,2*sizeof(*mvs));
+ /*Search the last frame.*/
+ oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
+ mvs[2][OC_FRAME_PREV][0]=accum_p[0];
+ mvs[2][OC_FRAME_PREV][1]=accum_p[1];
+ /*GOLDEN MVs are different from PREV MVs in that they're each absolute
+ offsets from some frame in the past rather than relative offsets from the
+ frame before.
+ For predictor calculation to make sense, we need them to be in the same
+ form as PREV MVs.*/
+ mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
+ mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
+ mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
+ mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
+ /*Search the golden frame.*/
+ oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
+ /*Put GOLDEN MVs back into absolute offset form.
+ The newest MV is already an absolute offset.*/
+ mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
+ mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
+ mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
+ mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
+ int _vec[2],int _best_err,int _frame){
+ const unsigned char *src;
+ const unsigned char *ref;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *fragis;
+ int offset_y[9];
+ int ystride;
+ int mvoffset_base;
+ int best_site;
+ int sitei;
+ int err;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]];
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ fragis=_enc->state.mb_maps[_mbi][0];
+ ystride=_enc->state.ref_ystride[0];
+ mvoffset_base=_vec[0]+_vec[1]*ystride;
+ offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+ offset_y[3]=offset_y[5]=0;
+ offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+ best_site=4;
+ for(sitei=0;sitei<8;sitei++){
+ int site;
+ int xmask;
+ int ymask;
+ int dx;
+ int dy;
+ int mvoffset0;
+ int mvoffset1;
+ site=OC_SQUARE_SITES[0][sitei];
+ dx=OC_SQUARE_DX[site];
+ dy=OC_SQUARE_DY[site];
+ /*The following code SHOULD be equivalent to
+ oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+ (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+ However, it should also be much faster, as it involves no multiplies and
+ doesn't have to handle chroma vectors.*/
+ xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+ ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+ mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+ mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+ err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
+ mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+ if(err<_best_err){
+ _best_err=err;
+ best_site=site;
+ }
+ }
+ _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+ _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+ return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
+ int _mbi,int _vec[2],unsigned _best_err,int _frame){
+ const unsigned char *src;
+ const unsigned char *ref;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *fragis;
+ int offset_y[9];
+ int ystride;
+ int mvoffset_base;
+ int best_site;
+ int sitei;
+ int err;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ fragis=_enc->state.mb_maps[_mbi][0];
+ ystride=_enc->state.ref_ystride[0];
+ mvoffset_base=_vec[0]+_vec[1]*ystride;
+ offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+ offset_y[3]=offset_y[5]=0;
+ offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+ best_site=4;
+ for(sitei=0;sitei<8;sitei++){
+ int site;
+ int xmask;
+ int ymask;
+ int dx;
+ int dy;
+ int mvoffset0;
+ int mvoffset1;
+ site=OC_SQUARE_SITES[0][sitei];
+ dx=OC_SQUARE_DX[site];
+ dy=OC_SQUARE_DY[site];
+ /*The following code SHOULD be equivalent to
+ oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+ (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+ However, it should also be much faster, as it involves no multiplies and
+ doesn't have to handle chroma vectors.*/
+ xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+ ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+ mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+ mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+ err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
+ mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+ if(err<_best_err){
+ _best_err=err;
+ best_site=site;
+ }
+ }
+ _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+ _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+ return _best_err;
+}
+
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
+ oc_mb_enc_info *embs;
+ int vec[2];
+ embs=_enc->mb_info;
+ vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
+ vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
+ embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
+ _mbi,vec,embs[_mbi].satd[_frame],_frame);
+ embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
+ embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+ int mvoffset_base;
+ int best_site;
+ int sitei;
+ mvoffset_base=_vec[0]+_vec[1]*_ystride;
+ best_site=4;
+ for(sitei=0;sitei<8;sitei++){
+ unsigned err;
+ int site;
+ int xmask;
+ int ymask;
+ int dx;
+ int dy;
+ int mvoffset0;
+ int mvoffset1;
+ site=OC_SQUARE_SITES[0][sitei];
+ dx=OC_SQUARE_DX[site];
+ dy=OC_SQUARE_DY[site];
+ /*The following code SHOULD be equivalent to
+ oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+ (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+ However, it should also be much faster, as it involves no multiplies and
+ doesn't have to handle chroma vectors.*/
+ xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+ ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+ mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+ mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+ err=oc_enc_frag_sad2_thresh(_enc,_src,
+ _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
+ if(err<_best_err){
+ _best_err=err;
+ best_site=site;
+ }
+ }
+ _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+ _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+ return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+ int mvoffset_base;
+ int best_site;
+ int sitei;
+ mvoffset_base=_vec[0]+_vec[1]*_ystride;
+ best_site=4;
+ for(sitei=0;sitei<8;sitei++){
+ unsigned err;
+ int site;
+ int xmask;
+ int ymask;
+ int dx;
+ int dy;
+ int mvoffset0;
+ int mvoffset1;
+ site=OC_SQUARE_SITES[0][sitei];
+ dx=OC_SQUARE_DX[site];
+ dy=OC_SQUARE_DY[site];
+ /*The following code SHOULD be equivalent to
+ oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
+ (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
+ However, it should also be much faster, as it involves no multiplies and
+ doesn't have to handle chroma vectors.*/
+ xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+ ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+ mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+ mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+ err=oc_enc_frag_satd2_thresh(_enc,_src,
+ _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
+ if(err<_best_err){
+ _best_err=err;
+ best_site=site;
+ }
+ }
+ _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+ _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+ return _best_err;
+}
+
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
+ oc_mb_enc_info *embs;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *fragis;
+ const unsigned char *src;
+ const unsigned char *ref;
+ int offset_y[9];
+ int ystride;
+ int bi;
+ ystride=_enc->state.ref_ystride[0];
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ fragis=_enc->state.mb_maps[_mbi][0];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+ offset_y[3]=offset_y[5]=0;
+ offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+ embs=_enc->mb_info;
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t frag_offs;
+ int vec[2];
+ frag_offs=frag_buf_offs[fragis[bi]];
+ vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
+ vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
+ embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
+ src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
+ embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
+ embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];
+ }
+}