diff options
Diffstat (limited to 'thirdparty/libtheora/tokenize.c')
-rw-r--r-- | thirdparty/libtheora/tokenize.c | 1072 |
1 files changed, 1072 insertions, 0 deletions
diff --git a/thirdparty/libtheora/tokenize.c b/thirdparty/libtheora/tokenize.c new file mode 100644 index 0000000000..60574c3594 --- /dev/null +++ b/thirdparty/libtheora/tokenize.c @@ -0,0 +1,1072 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include <stdlib.h> +#include <string.h> +#include "encint.h" + + + +static int oc_make_eob_token(int _run_count){ + if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1; + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +static int oc_make_eob_token_full(int _run_count,int *_eb){ + if(_run_count<4){ + *_eb=0; + return OC_DCT_EOB1_TOKEN+_run_count-1; + } + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +/*Returns the number of blocks ended by an EOB token.*/ +static int oc_decode_eob_token(int _token,int _eb){ + return (0x20820C41U>>_token*5&0x1F)+_eb; +} + +/*TODO: This is now only used during DCT tokenization, and never for runs; it + should be simplified.*/ +static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){ + int neg; + int zero_run; + int token; + int eb; + neg=_val<0; + _val=abs(_val); + zero_run=_zzj-_zzi; + if(zero_run>0){ + int adj; + /*Implement a minor restriction on stack 1 so that we know during DC fixups + that extending a dctrun token from stack 1 will never overflow.*/ + adj=_zzi!=1; + if(_val<2&&zero_run<17+adj){ + if(zero_run<6){ + token=OC_DCT_RUN_CAT1A+zero_run-1; + eb=neg; + } + else if(zero_run<10){ + token=OC_DCT_RUN_CAT1B; + eb=zero_run-6+(neg<<2); + } + else{ + token=OC_DCT_RUN_CAT1C; + eb=zero_run-10+(neg<<3); + } + } + else if(_val<4&&zero_run<3+adj){ + if(zero_run<2){ + token=OC_DCT_RUN_CAT2A; + eb=_val-2+(neg<<1); + } + else{ + token=OC_DCT_RUN_CAT2B; + eb=zero_run-2+(_val-2<<1)+(neg<<2); + } + } + else{ + if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN; + else token=OC_DCT_ZRL_TOKEN; + eb=zero_run-1; + } + } + else if(_val<3){ + token=OC_ONE_TOKEN+(_val-1<<1)+neg; + eb=0; + } + else if(_val<7){ + token=OC_DCT_VAL_CAT2+_val-3; + eb=neg; + } + else if(_val<9){ + token=OC_DCT_VAL_CAT3; + eb=_val-7+(neg<<1); + } + else if(_val<13){ + token=OC_DCT_VAL_CAT4; + eb=_val-9+(neg<<2); + } + else if(_val<21){ + token=OC_DCT_VAL_CAT5; + eb=_val-13+(neg<<3); + } + else if(_val<37){ + token=OC_DCT_VAL_CAT6; + eb=_val-21+(neg<<4); + } + else if(_val<69){ + token=OC_DCT_VAL_CAT7; + eb=_val-37+(neg<<5); + } + else{ + token=OC_DCT_VAL_CAT8; + eb=_val-69+(neg<<9); + } + *_eb=eb; + return token; +} + +/*Token logging to allow a few fragments of efficient rollback. + Late SKIP analysis is tied up in the tokenization process, so we need to be + able to undo a fragment's tokens on a whim.*/ + +static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ + 0,16,16,16,16,16,32,32, + 32,32,32,32,32,32,32,48, + 48,48,48,48,48,48,48,48, + 48,48,48,48,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 +}; + +static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){ + return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits + +OC_DCT_TOKEN_EXTRA_BITS[_token]; +} + +static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc, + oc_token_checkpoint *_cp,int _pli,int _zzi){ + _cp->pli=_pli; + _cp->zzi=_zzi; + _cp->eob_run=_enc->eob_run[_pli][_zzi]; + _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi]; +} + +void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, + const oc_token_checkpoint *_stack,int _n){ + int i; + for(i=_n;i-->0;){ + int pli; + int zzi; + pli=_stack[i].pli; + zzi=_stack[i].zzi; + _enc->eob_run[pli][zzi]=_stack[i].eob_run; + _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens; + } +} + +static void oc_enc_token_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _token,int _eb){ + ptrdiff_t ti; + ti=_enc->ndct_tokens[_pli][_zzi]++; + _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token; + _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb; +} + +static void oc_enc_eob_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _run_count){ + int token; + int eb; + token=oc_make_eob_token_full(_run_count,&eb); + oc_enc_token_log(_enc,_pli,_zzi,token,eb); +} + + +void oc_enc_tokenize_start(oc_enc_ctx *_enc){ + memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens)); + memset(_enc->eob_run,0,sizeof(_enc->eob_run)); + memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs)); + memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last)); +} + +typedef struct oc_quant_token oc_quant_token; + +/*A single node in the Viterbi trellis. + We maintain up to 2 of these per coefficient: + - A token to code if the value is zero (EOB, zero run, or combo token). + - A token to code if the value is not zero (DCT value token).*/ +struct oc_quant_token{ + unsigned char next; + signed char token; + ogg_int16_t eb; + ogg_uint32_t cost; + int bits; + int qc; +}; + +/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then + dequantizes and de-zig-zags the result. + The DC coefficient is not preserved; it should be restored by the caller.*/ +int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, + ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, + int _zzi,oc_token_checkpoint **_stack,int _acmin){ + oc_token_checkpoint *stack; + ogg_int64_t zflags; + ogg_int64_t nzflags; + ogg_int64_t best_flags; + ogg_uint32_t d2_accum[64]; + oc_quant_token tokens[64][2]; + ogg_uint16_t *eob_run; + const unsigned char *dct_fzig_zag; + ogg_uint32_t cost; + int bits; + int eob; + int token; + int eb; + int next; + int huffi; + int zzi; + int ti; + int zzj; + int qc; + huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1]; + eob_run=_enc->eob_run[_pli]; + memset(tokens[0],0,sizeof(tokens[0])); + best_flags=nzflags=0; + zflags=1; + d2_accum[0]=0; + zzj=64; + for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){ + ogg_int32_t lambda; + ogg_uint32_t best_cost; + int best_bits=best_bits; + int best_next=best_next; + int best_token=best_token; + int best_eb=best_eb; + int best_qc=best_qc; + int flush_bits; + ogg_uint32_t d2; + int dq; + int e; + int c; + int s; + int tj; + lambda=_enc->lambda; + qc=_qdct[zzi]; + s=-(qc<0); + qc=qc+s^s; + c=_dct[OC_FZIG_ZAG[zzi]]; + if(qc<=1){ + ogg_uint32_t sum_d2; + int nzeros; + int dc_reserve; + /*The hard case: try a zero run.*/ + if(!qc){ + /*Skip runs that are already quantized to zeros. + If we considered each zero coefficient in turn, we might + theoretically find a better way to partition long zero runs (e.g., + a run of > 17 zeros followed by a 1 might be better coded as a short + zero run followed by a combo token, rather than the longer zero + token followed by a 1 value token), but zeros are so common that + this becomes very computationally expensive (quadratic instead of + linear in the number of coefficients), for a marginal gain.*/ + while(zzi>1&&!_qdct[zzi-1])zzi--; + /*The distortion of coefficients originally quantized to zero is + treated as zero (since we'll never quantize them to anything else).*/ + d2=0; + } + else{ + c=c+s^s; + d2=c*(ogg_int32_t)c; + } + eob=eob_run[zzi]; + nzeros=zzj-zzi; + zzj&=63; + sum_d2=d2+d2_accum[zzj]; + d2_accum[zzi]=sum_d2; + flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + /*We reserve 1 spot for combo run tokens that start in the 1st AC stack + to ensure they can be extended to include the DC coefficient if + necessary; this greatly simplifies stack-rewriting later on.*/ + dc_reserve=zzi+62>>6; + best_cost=0xFFFFFFFF; + for(;;){ + if(nzflags>>zzj&1){ + int cat; + int val; + int val_s; + int zzk; + int tk; + next=tokens[zzj][1].next; + tk=next&1; + zzk=next>>1; + /*Try a pure zero run to this point.*/ + cat=nzeros+55>>6; + token=OC_DCT_SHORT_ZRL_TOKEN+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + d2=sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzj][1].cost; + if(cost<=best_cost){ + best_next=(zzj<<1)+1; + best_token=token; + best_eb=nzeros-1; + best_cost=cost; + best_bits=bits+tokens[zzj][1].bits; + best_qc=0; + } + if(nzeros<16+dc_reserve){ + val=_qdct[zzj]; + val_s=-(val<0); + val=val+val_s^val_s; + if(val<=2){ + /*Try a +/- 1 combo token.*/ + if(nzeros<6){ + token=OC_DCT_RUN_CAT1A+nzeros-1; + eb=-val_s; + } + else{ + cat=nzeros+54>>6; + token=OC_DCT_RUN_CAT1B+cat; + eb=(-val_s<<cat+2)+nzeros-6-(cat<<2); + } + e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]; + d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits+tokens[zzk][tk].cost; + if(cost<=best_cost){ + best_next=next; + best_token=token; + best_eb=eb; + best_cost=cost; + best_bits=bits+tokens[zzk][tk].bits; + best_qc=1+val_s^val_s; + } + } + if(nzeros<2+dc_reserve&&2<=val&&val<=4){ + /*Try a +/- 2/3 combo token.*/ + cat=nzeros>>1; + token=OC_DCT_RUN_CAT2A+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + val=2+((val+val_s^val_s)>2); + e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val; + d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzk][tk].cost; + if(cost<=best_cost){ + best_cost=cost; + best_bits=bits+tokens[zzk][tk].bits; + best_next=next; + best_token=token; + best_eb=(-val_s<<1+cat)+(val-2<<cat)+(nzeros-1>>1); + best_qc=val+val_s^val_s; + } + } + } + /*zzj can't be coded as a zero, so stop trying to extend the run.*/ + if(!(zflags>>zzj&1))break; + } + /*We could try to consider _all_ potentially non-zero coefficients, but + if we already found a bunch of them not worth coding, it's fairly + unlikely they would now be worth coding from this position; skipping + them saves a lot of work.*/ + zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63; + if(zzj==0){ + /*We made it all the way to the end of the block; try an EOB token.*/ + if(eob<4095){ + bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1)) + -flush_bits; + } + else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN); + cost=sum_d2+bits*lambda; + /*If the best route so far is still a pure zero run to the end of the + block, force coding it as an EOB. + Even if it's not optimal for this block, it has a good chance of + getting combined with an EOB token from subsequent blocks, saving + bits overall.*/ + if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){ + best_next=0; + /*This token is just a marker; in reality we may not emit any + tokens, but update eob_run[] instead.*/ + best_token=OC_DCT_EOB1_TOKEN; + best_eb=0; + best_cost=cost; + best_bits=bits; + best_qc=0; + } + break; + } + nzeros=zzj-zzi; + } + tokens[zzi][0].next=(unsigned char)best_next; + tokens[zzi][0].token=(signed char)best_token; + tokens[zzi][0].eb=(ogg_int16_t)best_eb; + tokens[zzi][0].cost=best_cost; + tokens[zzi][0].bits=best_bits; + tokens[zzi][0].qc=best_qc; + zflags|=(ogg_int64_t)1<<zzi; + if(qc){ + dq=_dequant[zzi]; + if(zzi<_acmin)lambda=0; + e=dq-c; + d2=e*(ogg_int32_t)e; + token=OC_ONE_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + zzj=zzi+1&63; + tj=best_flags>>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)token; + tokens[zzi][1].eb=0; + tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost; + tokens[zzi][1].bits=bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=1+s^s; + nzflags|=(ogg_int64_t)1<<zzi; + best_flags|= + (ogg_int64_t)(tokens[zzi][1].cost<tokens[zzi][0].cost)<<zzi; + } + } + else{ + eob=eob_run[zzi]; + if(zzi<_acmin)lambda=0; + c=c+s^s; + dq=_dequant[zzi]; + /*No zero run can extend past this point.*/ + d2_accum[zzi]=0; + flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + if(qc<=2){ + e=2*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_TWO_TOKEN-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_ONE_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + best_eb=0; + } + else if(qc<=3){ + e=3*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_TWO_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=0; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=6){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2+qc-3; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=8){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT3; + best_eb=(-s<<1)+qc-7; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=6*dq-c; + d2=e*(ogg_int32_t)e; + token=OC_DCT_VAL_CAT2+3; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=-s; + best_bits=bits; + best_cost=cost; + qc=6; + } + } + else if(qc<=12){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT4; + best_eb=(-s<<2)+qc-9; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=8*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<1)+1; + best_bits=bits; + best_cost=cost; + qc=8; + } + } + else if(qc<=20){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT5; + best_eb=(-s<<3)+qc-13; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=12*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<2)+3; + best_bits=bits; + best_cost=cost; + qc=12; + } + } + else if(qc<=36){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT6; + best_eb=(-s<<4)+qc-21; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=20*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<3)+7; + best_bits=bits; + best_cost=cost; + qc=20; + } + } + else if(qc<=68){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT7; + best_eb=(-s<<5)+qc-37; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=36*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<best_cost){ + best_token=token; + best_eb=(-s<<4)+15; + best_bits=bits; + best_cost=cost; + qc=36; + } + } + else{ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT8; + best_eb=(-s<<9)+qc-69; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=68*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<best_cost){ + best_token=token; + best_eb=(-s<<5)+31; + best_bits=bits; + best_cost=cost; + qc=68; + } + } + zzj=zzi+1&63; + tj=best_flags>>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)best_token; + tokens[zzi][1].eb=best_eb; + tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost; + tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=qc+s^s; + nzflags|=(ogg_int64_t)1<<zzi; + best_flags|=(ogg_int64_t)1<<zzi; + } + zzj=zzi; + } + /*Emit the tokens from the best path through the trellis.*/ + stack=*_stack; + /*We blow away the first entry here so that things vectorize better. + The DC coefficient is not actually stored in the array yet.*/ + for(zzi=0;zzi<64;zzi++)_qdct[zzi]=0; + dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag; + zzi=1; + ti=best_flags>>1&1; + bits=tokens[zzi][ti].bits; + do{ + oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi); + eob=eob_run[zzi]; + if(tokens[zzi][ti].token<OC_NDCT_EOB_TOKEN_MAX){ + if(++eob>=4095){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob=0; + } + eob_run[zzi]=eob; + /*We don't include the actual EOB cost for this block in the return value. + It will be paid for by the fragment that terminates the EOB run.*/ + bits-=tokens[zzi][ti].bits; + zzi=_zzi; + break; + } + /*Emit pending EOB run if any.*/ + if(eob>0){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob_run[zzi]=0; + } + oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb); + next=tokens[zzi][ti].next; + qc=tokens[zzi][ti].qc; + zzj=(next>>1)-1&63; + /*TODO: It may be worth saving the dequantized coefficient in the trellis + above; we had to compute it to measure the error anyway.*/ + _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]); + zzi=next>>1; + ti=next&1; + } + while(zzi); + *_stack=stack; + return bits; +} + +void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, + int _pli,int _fragy0,int _frag_yend){ + const oc_fragment_plane *fplane; + const oc_fragment *frags; + ogg_int16_t *frag_dc; + ptrdiff_t fragi; + int *pred_last; + int nhfrags; + int fragx; + int fragy; + fplane=_enc->state.fplanes+_pli; + frags=_enc->state.frags; + frag_dc=_enc->frag_dc; + pred_last=_enc->dc_pred_last[_pli]; + nhfrags=fplane->nhfrags; + fragi=fplane->froffset+_fragy0*nhfrags; + for(fragy=_fragy0;fragy<_frag_yend;fragy++){ + if(fragy==0){ + /*For the first row, all of the cases reduce to just using the previous + predictor for the same reference frame.*/ + for(fragx=0;fragx<nhfrags;fragx++,fragi++){ + if(frags[fragi].coded){ + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[ref]); + pred_last[ref]=frags[fragi].dc; + } + } + } + else{ + const oc_fragment *u_frags; + int l_ref; + int ul_ref; + int u_ref; + u_frags=frags-nhfrags; + l_ref=-1; + ul_ref=-1; + u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1; + for(fragx=0;fragx<nhfrags;fragx++,fragi++){ + int ur_ref; + if(fragx+1>=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred); + pred_last[ref]=frags[fragi].dc; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } +} + +void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, + const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, + int _prev_ndct_tokens1,int _prev_eob_run1){ + const ogg_int16_t *frag_dc; + ptrdiff_t fragii; + unsigned char *dct_tokens0; + unsigned char *dct_tokens1; + ogg_uint16_t *extra_bits0; + ogg_uint16_t *extra_bits1; + ptrdiff_t ti0; + ptrdiff_t ti1r; + ptrdiff_t ti1w; + int eob_run0; + int eob_run1; + int neobs1; + int token; + int eb; + int token1=token1; + int eb1=eb1; + /*Return immediately if there are no coded fragments; otherwise we'd flush + any trailing EOB run into the AC 1 list and never read it back out.*/ + if(_ncoded_fragis<=0)return; + frag_dc=_enc->frag_dc; + dct_tokens0=_enc->dct_tokens[_pli][0]; + dct_tokens1=_enc->dct_tokens[_pli][1]; + extra_bits0=_enc->extra_bits[_pli][0]; + extra_bits1=_enc->extra_bits[_pli][1]; + ti0=_enc->ndct_tokens[_pli][0]; + ti1w=ti1r=_prev_ndct_tokens1; + eob_run0=_enc->eob_run[_pli][0]; + /*Flush any trailing EOB run for the 1st AC coefficient. + This is needed to allow us to track tokens to the end of the list.*/ + eob_run1=_enc->eob_run[_pli][1]; + if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1); + /*If there was an active EOB run at the start of the 1st AC stack, read it + in and decode it.*/ + if(_prev_eob_run1>0){ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + eob_run1=oc_decode_eob_token(token1,eb1); + /*Consume the portion of the run that came before these fragments.*/ + neobs1=eob_run1-_prev_eob_run1; + } + else eob_run1=neobs1=0; + for(fragii=0;fragii<_ncoded_fragis;fragii++){ + int val; + /*All tokens in the 1st AC coefficient stack are regenerated as the DC + coefficients are produced. + This can be done in-place; stack 1 cannot get larger.*/ + if(!neobs1){ + /*There's no active EOB run in stack 1; read the next token.*/ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + if(token1<OC_NDCT_EOB_TOKEN_MAX){ + neobs1=oc_decode_eob_token(token1,eb1); + /*It's an EOB run; add it to the current (inactive) one. + Because we may have moved entries to stack 0, we may have an + opportunity to merge two EOB runs in stack 1.*/ + eob_run1+=neobs1; + } + } + val=frag_dc[_coded_fragis[fragii]]; + if(val){ + /*There was a non-zero DC value, so there's no alteration to stack 1 + for this fragment; just code the stack 0 token.*/ + /*Flush any pending EOB run.*/ + if(eob_run0>0){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + token=oc_make_dct_token_full(0,0,val,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + } + else{ + /*Zero DC value; that means the entry in stack 1 might need to be coded + from stack 0. + This requires a stack 1 fixup.*/ + if(neobs1>0){ + /*We're in the middle of an active EOB run in stack 1. + Move it to stack 0.*/ + if(++eob_run0>=4095){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + eob_run1--; + } + else{ + /*No active EOB run in stack 1, so we can't extend one in stack 0. + Flush it if we've got it.*/ + if(eob_run0>0){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + /*Stack 1 token is one of: a pure zero run token, a single + coefficient token, or a zero run/coefficient combo token. + A zero run token is expanded and moved to token stack 0, and the + stack 1 entry dropped. + A single coefficient value may be transformed into combo token that + is moved to stack 0, or if it cannot be combined, it is left alone + and a single length-1 zero run is emitted in stack 0. + A combo token is extended and moved to stack 0. + During AC coding, we restrict the run lengths on combo tokens for + stack 1 to guarantee we can extend them.*/ + switch(token1){ + case OC_DCT_SHORT_ZRL_TOKEN:{ + if(eb1<7){ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + /*Fall through.*/ + } + case OC_DCT_ZRL_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_ONE_TOKEN: + case OC_MINUS_ONE_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_TWO_TOKEN: + case OC_MINUS_TWO_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_VAL_CAT2:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A: + case OC_DCT_RUN_CAT1A+1: + case OC_DCT_RUN_CAT1A+2: + case OC_DCT_RUN_CAT1A+3:{ + dct_tokens0[ti0]=(unsigned char)(token1+1); + extra_bits0[ti0]=(ogg_uint16_t)eb1; + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A+4:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1B:{ + if((eb1&3)<3){ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + eb1=((eb1&4)<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT1C:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1C; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT2A:{ + eb1=(eb1<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT2B:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + } + /*We can't merge tokens, write a short zero run and keep going.*/ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=0; + ti0++; + } + } + if(!neobs1){ + /*Flush any (inactive) EOB run.*/ + if(eob_run1>0){ + token=oc_make_eob_token_full(eob_run1,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1=0; + } + /*There's no active EOB run, so log the current token.*/ + dct_tokens1[ti1w]=(unsigned char)token1; + extra_bits1[ti1w]=(ogg_uint16_t)eb1; + ti1w++; + } + else{ + /*Otherwise consume one EOB from the current run.*/ + neobs1--; + /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/ + if(eob_run1-neobs1>=4095){ + token=oc_make_eob_token_full(4095,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1-=4095; + } + } + } + /*Save the current state.*/ + _enc->ndct_tokens[_pli][0]=ti0; + _enc->ndct_tokens[_pli][1]=ti1w; + _enc->eob_run[_pli][0]=eob_run0; + _enc->eob_run[_pli][1]=eob_run1; +} + +/*Final EOB run welding.*/ +void oc_enc_tokenize_finish(oc_enc_ctx *_enc){ + int pli; + int zzi; + /*Emit final EOB runs.*/ + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + int eob_run; + eob_run=_enc->eob_run[pli][zzi]; + if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run); + } + /*Merge the final EOB run of one token list with the start of the next, if + possible.*/ + for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){ + int old_tok1; + int old_tok2; + int old_eb1; + int old_eb2; + int new_tok; + int new_eb; + int zzj; + int plj; + ptrdiff_t ti=ti; + int run_count; + /*Make sure this coefficient has tokens at all.*/ + if(_enc->ndct_tokens[pli][zzi]<=0)continue; + /*Ensure the first token is an EOB run.*/ + old_tok2=_enc->dct_tokens[pli][zzi][0]; + if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Search for a previous coefficient that has any tokens at all.*/ + old_tok1=OC_NDCT_EOB_TOKEN_MAX; + for(zzj=zzi,plj=pli;zzj>=0;zzj--){ + while(plj-->0){ + ti=_enc->ndct_tokens[plj][zzj]-1; + if(ti>=_enc->dct_token_offs[plj][zzj]){ + old_tok1=_enc->dct_tokens[plj][zzj][ti]; + break; + } + } + if(plj>=0)break; + plj=3; + } + /*Ensure its last token was an EOB run.*/ + if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Pull off the associated extra bits, if any, and decode the runs.*/ + old_eb1=_enc->extra_bits[plj][zzj][ti]; + old_eb2=_enc->extra_bits[pli][zzi][0]; + run_count=oc_decode_eob_token(old_tok1,old_eb1) + +oc_decode_eob_token(old_tok2,old_eb2); + /*We can't possibly combine these into one run. + It might be possible to split them more optimally, but we'll just leave + them as-is.*/ + if(run_count>=4096)continue; + /*We CAN combine them into one run.*/ + new_tok=oc_make_eob_token_full(run_count,&new_eb); + _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok; + _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb; + _enc->dct_token_offs[pli][zzi]++; + } +} |