diff options
Diffstat (limited to 'thirdparty/libtheora/x86_vc/mmxstate.c')
-rw-r--r-- | thirdparty/libtheora/x86_vc/mmxstate.c | 75 |
1 files changed, 20 insertions, 55 deletions
diff --git a/thirdparty/libtheora/x86_vc/mmxstate.c b/thirdparty/libtheora/x86_vc/mmxstate.c index 73bd1981cf..f532ee1b6f 100644 --- a/thirdparty/libtheora/x86_vc/mmxstate.c +++ b/thirdparty/libtheora/x86_vc/mmxstate.c @@ -11,7 +11,7 @@ ******************************************************************** function: - last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $ + last mod: $Id$ ********************************************************************/ @@ -19,17 +19,16 @@ Originally written by Rudolf Marek.*/ #include <string.h> #include "x86int.h" -#include "mmxfrag.h" #include "mmxloop.h" #if defined(OC_X86_ASM) void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ unsigned char *dst; ptrdiff_t frag_buf_off; int ystride; - int mb_mode; + int refi; /*Apply the inverse transform.*/ /*Special case only having a DC component.*/ if(_last_zzi<2){ @@ -45,6 +44,7 @@ void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, #define P ecx mov Y,_dct_coeffs movzx P,p + lea Y,[Y+128] /*mm0=0000 0000 0000 AAAA*/ movd mm0,P /*mm0=0000 0000 AAAA AAAA*/ @@ -74,65 +74,32 @@ void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, else{ /*Dequantize the DC coefficient.*/ _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); - oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + oc_idct8x8_mmx(_dct_coeffs+64,_dct_coeffs,_last_zzi); } /*Fill in the target buffer.*/ frag_buf_off=_state->frag_buf_offs[_fragi]; - mb_mode=_state->frags[_fragi].mb_mode; + refi=_state->frags[_fragi].refi; ystride=_state->ref_ystride[_pli]; - dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; - if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; + if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64); else{ const unsigned char *ref; int mvoffsets[2]; - ref= - _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] - +frag_buf_off; + ref=_state->ref_frame_data[refi]+frag_buf_off; if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, - _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + _state->frag_mvs[_fragi])>1){ oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, - _dct_coeffs); + _dct_coeffs+64); } - else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); } } /*We copy these entire function to inline the actual MMX routines so that we use only a single indirect call.*/ -/*Copies the fragments specified by the lists of fragment indices from one - frame to another. - _fragis: A pointer to a list of fragment indices. - _nfragis: The number of fragment indices to copy. - _dst_frame: The reference frame to copy to. - _src_frame: The reference frame to copy from. - _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli){ - const ptrdiff_t *frag_buf_offs; - const unsigned char *src_frame_data; - unsigned char *dst_frame_data; - ptrdiff_t fragii; - int ystride; - dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; - src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; - ystride=_state->ref_ystride[_pli]; - frag_buf_offs=_state->frag_buf_offs; - for(fragii=0;fragii<_nfragis;fragii++){ - ptrdiff_t frag_buf_off; - frag_buf_off=frag_buf_offs[_fragis[fragii]]; -#define SRC edx -#define DST eax -#define YSTRIDE ecx -#define YSTRIDE3 edi - OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, - src_frame_data+frag_buf_off,ystride); -#undef SRC -#undef DST -#undef YSTRIDE -#undef YSTRIDE3 - } +void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){ + memset(_bv,~(_flimit<<1),8); } /*Apply the loop filter to a given set of fragment rows in the given plane. @@ -144,8 +111,7 @@ void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, _fragy0: The Y coordinate of the first fragment row to filter. _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ - OC_ALIGN8(unsigned char ll[8]); + signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ const oc_fragment_plane *fplane; const oc_fragment *frags; const ptrdiff_t *frag_buf_offs; @@ -156,13 +122,12 @@ void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, ptrdiff_t fragi0_end; int ystride; int nhfrags; - memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); fplane=_state->fplanes+_pli; nhfrags=fplane->nhfrags; fragi_top=fplane->froffset; fragi_bot=fragi_top+fplane->nfrags; fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; - fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags; ystride=_state->ref_ystride[_pli]; frags=_state->frags; frag_buf_offs=_state->frag_buf_offs; @@ -187,13 +152,13 @@ void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, #define LL edx #define D esi #define D_WORD si - if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); - if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,_bv); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,_bv); if(fragi+1<fragi_end&&!frags[fragi+1].coded){ - OC_LOOP_FILTER_H_MMX(ref+8,ystride,ll); + OC_LOOP_FILTER_H_MMX(ref+8,ystride,_bv); } if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){ - OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,ll); + OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,_bv); } #undef PIX #undef YSTRIDE3 |