1 files changed, 1072 insertions, 0 deletions
diff --git a/thirdparty/libtheora/tokenize.c b/thirdparty/libtheora/tokenize.c
new file mode 100644
index 0000000000..60574c3594
--- /dev/null
+++ b/thirdparty/libtheora/tokenize.c
@@ -0,0 +1,1072 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+static int oc_make_eob_token(int _run_count){
+  if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1;
+  else{
+    int cat;
+    cat=OC_ILOGNZ_32(_run_count)-3;
+    cat=OC_MINI(cat,3);
+    return OC_DCT_REPEAT_RUN0_TOKEN+cat;
+  }
+}
+
+static int oc_make_eob_token_full(int _run_count,int *_eb){
+  if(_run_count<4){
+    *_eb=0;
+    return OC_DCT_EOB1_TOKEN+_run_count-1;
+  }
+  else{
+    int cat;
+    cat=OC_ILOGNZ_32(_run_count)-3;
+    cat=OC_MINI(cat,3);
+    *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat);
+    return OC_DCT_REPEAT_RUN0_TOKEN+cat;
+  }
+}
+
+/*Returns the number of blocks ended by an EOB token.*/
+static int oc_decode_eob_token(int _token,int _eb){
+  return (0x20820C41U>>_token*5&0x1F)+_eb;
+}
+
+/*TODO: This is now only used during DCT tokenization, and never for runs; it
+   should be simplified.*/
+static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){
+  int neg;
+  int zero_run;
+  int token;
+  int eb;
+  neg=_val<0;
+  _val=abs(_val);
+  zero_run=_zzj-_zzi;
+  if(zero_run>0){
+    int adj;
+    /*Implement a minor restriction on stack 1 so that we know during DC fixups
+       that extending a dctrun token from stack 1 will never overflow.*/
+    adj=_zzi!=1;
+    if(_val<2&&zero_run<17+adj){
+      if(zero_run<6){
+        token=OC_DCT_RUN_CAT1A+zero_run-1;
+        eb=neg;
+      }
+      else if(zero_run<10){
+        token=OC_DCT_RUN_CAT1B;
+        eb=zero_run-6+(neg<<2);
+      }
+      else{
+        token=OC_DCT_RUN_CAT1C;
+        eb=zero_run-10+(neg<<3);
+      }
+    }
+    else if(_val<4&&zero_run<3+adj){
+      if(zero_run<2){
+        token=OC_DCT_RUN_CAT2A;
+        eb=_val-2+(neg<<1);
+      }
+      else{
+        token=OC_DCT_RUN_CAT2B;
+        eb=zero_run-2+(_val-2<<1)+(neg<<2);
+      }
+    }
+    else{
+      if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN;
+      else token=OC_DCT_ZRL_TOKEN;
+      eb=zero_run-1;
+    }
+  }
+  else if(_val<3){
+    token=OC_ONE_TOKEN+(_val-1<<1)+neg;
+    eb=0;
+  }
+  else if(_val<7){
+    token=OC_DCT_VAL_CAT2+_val-3;
+    eb=neg;
+  }
+  else if(_val<9){
+    token=OC_DCT_VAL_CAT3;
+    eb=_val-7+(neg<<1);
+  }
+  else if(_val<13){
+    token=OC_DCT_VAL_CAT4;
+    eb=_val-9+(neg<<2);
+  }
+  else if(_val<21){
+    token=OC_DCT_VAL_CAT5;
+    eb=_val-13+(neg<<3);
+  }
+  else if(_val<37){
+    token=OC_DCT_VAL_CAT6;
+    eb=_val-21+(neg<<4);
+  }
+  else if(_val<69){
+    token=OC_DCT_VAL_CAT7;
+    eb=_val-37+(neg<<5);
+  }
+  else{
+    token=OC_DCT_VAL_CAT8;
+    eb=_val-69+(neg<<9);
+  }
+  *_eb=eb;
+  return token;
+}
+
+/*Token logging to allow a few fragments of efficient rollback.
+  Late SKIP analysis is tied up in the tokenization process, so we need to be
+   able to undo a fragment's tokens on a whim.*/
+
+static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+   0,16,16,16,16,16,32,32,
+  32,32,32,32,32,32,32,48,
+  48,48,48,48,48,48,48,48,
+  48,48,48,48,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){
+  return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits
+   +OC_DCT_TOKEN_EXTRA_BITS[_token];
+}
+
+static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc,
+ oc_token_checkpoint *_cp,int _pli,int _zzi){
+  _cp->pli=_pli;
+  _cp->zzi=_zzi;
+  _cp->eob_run=_enc->eob_run[_pli][_zzi];
+  _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi];
+}
+
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n){
+  int i;
+  for(i=_n;i-->0;){
+    int pli;
+    int zzi;
+    pli=_stack[i].pli;
+    zzi=_stack[i].zzi;
+    _enc->eob_run[pli][zzi]=_stack[i].eob_run;
+    _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens;
+  }
+}
+
+static void oc_enc_token_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _token,int _eb){
+  ptrdiff_t ti;
+  ti=_enc->ndct_tokens[_pli][_zzi]++;
+  _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token;
+  _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb;
+}
+
+static void oc_enc_eob_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _run_count){
+  int token;
+  int eb;
+  token=oc_make_eob_token_full(_run_count,&eb);
+  oc_enc_token_log(_enc,_pli,_zzi,token,eb);
+}
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc){
+  memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens));
+  memset(_enc->eob_run,0,sizeof(_enc->eob_run));
+  memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs));
+  memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last));
+}
+
+typedef struct oc_quant_token oc_quant_token;
+
+/*A single node in the Viterbi trellis.
+  We maintain up to 2 of these per coefficient:
+    - A token to code if the value is zero (EOB, zero run, or combo token).
+    - A token to code if the value is not zero (DCT value token).*/
+struct oc_quant_token{
+  unsigned char next;
+  signed char   token;
+  ogg_int16_t   eb;
+  ogg_uint32_t  cost;
+  int           bits;
+  int           qc;
+};
+
+/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then
+   dequantizes and de-zig-zags the result.
+  The DC coefficient is not preserved; it should be restored by the caller.*/
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _acmin){
+  oc_token_checkpoint *stack;
+  ogg_int64_t          zflags;
+  ogg_int64_t          nzflags;
+  ogg_int64_t          best_flags;
+  ogg_uint32_t         d2_accum[64];
+  oc_quant_token       tokens[64][2];
+  ogg_uint16_t        *eob_run;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint32_t         cost;
+  int                  bits;
+  int                  eob;
+  int                  token;
+  int                  eb;
+  int                  next;
+  int                  huffi;
+  int                  zzi;
+  int                  ti;
+  int                  zzj;
+  int                  qc;
+  huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1];
+  eob_run=_enc->eob_run[_pli];
+  memset(tokens[0],0,sizeof(tokens[0]));
+  best_flags=nzflags=0;
+  zflags=1;
+  d2_accum[0]=0;
+  zzj=64;
+  for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){
+    ogg_int32_t  lambda;
+    ogg_uint32_t best_cost;
+    int          best_bits=best_bits;
+    int          best_next=best_next;
+    int          best_token=best_token;
+    int          best_eb=best_eb;
+    int          best_qc=best_qc;
+    int          flush_bits;
+    ogg_uint32_t d2;
+    int          dq;
+    int          e;
+    int          c;
+    int          s;
+    int          tj;
+    lambda=_enc->lambda;
+    qc=_qdct[zzi];
+    s=-(qc<0);
+    qc=qc+s^s;
+    c=_dct[OC_FZIG_ZAG[zzi]];
+    if(qc<=1){
+      ogg_uint32_t sum_d2;
+      int          nzeros;
+      int          dc_reserve;
+      /*The hard case: try a zero run.*/
+      if(!qc){
+        /*Skip runs that are already quantized to zeros.
+          If we considered each zero coefficient in turn, we might
+           theoretically find a better way to partition long zero runs (e.g.,
+           a run of > 17 zeros followed by a 1 might be better coded as a short
+           zero run followed by a combo token, rather than the longer zero
+           token followed by a 1 value token), but zeros are so common that
+           this becomes very computationally expensive (quadratic instead of
+           linear in the number of coefficients), for a marginal gain.*/
+        while(zzi>1&&!_qdct[zzi-1])zzi--;
+        /*The distortion of coefficients originally quantized to zero is
+           treated as zero (since we'll never quantize them to anything else).*/
+        d2=0;
+      }
+      else{
+        c=c+s^s;
+        d2=c*(ogg_int32_t)c;
+      }
+      eob=eob_run[zzi];
+      nzeros=zzj-zzi;
+      zzj&=63;
+      sum_d2=d2+d2_accum[zzj];
+      d2_accum[zzi]=sum_d2;
+      flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0;
+      /*We reserve 1 spot for combo run tokens that start in the 1st AC stack
+         to ensure they can be extended to include the DC coefficient if
+         necessary; this greatly simplifies stack-rewriting later on.*/
+      dc_reserve=zzi+62>>6;
+      best_cost=0xFFFFFFFF;
+      for(;;){
+        if(nzflags>>zzj&1){
+          int cat;
+          int val;
+          int val_s;
+          int zzk;
+          int tk;
+          next=tokens[zzj][1].next;
+          tk=next&1;
+          zzk=next>>1;
+          /*Try a pure zero run to this point.*/
+          cat=nzeros+55>>6;
+          token=OC_DCT_SHORT_ZRL_TOKEN+cat;
+          bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+          d2=sum_d2-d2_accum[zzj];
+          cost=d2+lambda*bits+tokens[zzj][1].cost;
+          if(cost<=best_cost){
+            best_next=(zzj<<1)+1;
+            best_token=token;
+            best_eb=nzeros-1;
+            best_cost=cost;
+            best_bits=bits+tokens[zzj][1].bits;
+            best_qc=0;
+          }
+          if(nzeros<16+dc_reserve){
+            val=_qdct[zzj];
+            val_s=-(val<0);
+            val=val+val_s^val_s;
+            if(val<=2){
+              /*Try a +/- 1 combo token.*/
+              if(nzeros<6){
+                token=OC_DCT_RUN_CAT1A+nzeros-1;
+                eb=-val_s;
+              }
+              else{
+                cat=nzeros+54>>6;
+                token=OC_DCT_RUN_CAT1B+cat;
+                eb=(-val_s<<cat+2)+nzeros-6-(cat<<2);
+              }
+              e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj];
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+              cost=d2+lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_next=next;
+                best_token=token;
+                best_eb=eb;
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_qc=1+val_s^val_s;
+              }
+            }
+            if(nzeros<2+dc_reserve&&2<=val&&val<=4){
+              /*Try a +/- 2/3 combo token.*/
+              cat=nzeros>>1;
+              token=OC_DCT_RUN_CAT2A+cat;
+              bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+              val=2+((val+val_s^val_s)>2);
+              e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val;
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              cost=d2+lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_next=next;
+                best_token=token;
+                best_eb=(-val_s<<1+cat)+(val-2<<cat)+(nzeros-1>>1);
+                best_qc=val+val_s^val_s;
+              }
+            }
+          }
+          /*zzj can't be coded as a zero, so stop trying to extend the run.*/
+          if(!(zflags>>zzj&1))break;
+        }
+        /*We could try to consider _all_ potentially non-zero coefficients, but
+           if we already found a bunch of them not worth coding, it's fairly
+           unlikely they would now be worth coding from this position; skipping
+           them saves a lot of work.*/
+        zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63;
+        if(zzj==0){
+          /*We made it all the way to the end of the block; try an EOB token.*/
+          if(eob<4095){
+            bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1))
+             -flush_bits;
+          }
+          else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN);
+          cost=sum_d2+bits*lambda;
+          /*If the best route so far is still a pure zero run to the end of the
+             block, force coding it as an EOB.
+            Even if it's not optimal for this block, it has a good chance of
+             getting combined with an EOB token from subsequent blocks, saving
+             bits overall.*/
+          if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){
+            best_next=0;
+            /*This token is just a marker; in reality we may not emit any
+               tokens, but update eob_run[] instead.*/
+            best_token=OC_DCT_EOB1_TOKEN;
+            best_eb=0;
+            best_cost=cost;
+            best_bits=bits;
+            best_qc=0;
+          }
+          break;
+        }
+        nzeros=zzj-zzi;
+      }
+      tokens[zzi][0].next=(unsigned char)best_next;
+      tokens[zzi][0].token=(signed char)best_token;
+      tokens[zzi][0].eb=(ogg_int16_t)best_eb;
+      tokens[zzi][0].cost=best_cost;
+      tokens[zzi][0].bits=best_bits;
+      tokens[zzi][0].qc=best_qc;
+      zflags|=(ogg_int64_t)1<<zzi;
+      if(qc){
+        dq=_dequant[zzi];
+        if(zzi<_acmin)lambda=0;
+        e=dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=OC_ONE_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        zzj=zzi+1&63;
+        tj=best_flags>>zzj&1;
+        next=(zzj<<1)+tj;
+        tokens[zzi][1].next=(unsigned char)next;
+        tokens[zzi][1].token=(signed char)token;
+        tokens[zzi][1].eb=0;
+        tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost;
+        tokens[zzi][1].bits=bits+tokens[zzj][tj].bits;
+        tokens[zzi][1].qc=1+s^s;
+        nzflags|=(ogg_int64_t)1<<zzi;
+        best_flags|=
+         (ogg_int64_t)(tokens[zzi][1].cost<tokens[zzi][0].cost)<<zzi;
+      }
+    }
+    else{
+      eob=eob_run[zzi];
+      if(zzi<_acmin)lambda=0;
+      c=c+s^s;
+      dq=_dequant[zzi];
+      /*No zero run can extend past this point.*/
+      d2_accum[zzi]=0;
+      flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0;
+      if(qc<=2){
+        e=2*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_TWO_TOKEN-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=OC_ONE_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+        best_eb=0;
+      }
+      else if(qc<=3){
+        e=3*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT2;
+        best_eb=-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=OC_TWO_TOKEN-s;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=0;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+      }
+      else if(qc<=6){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT2+qc-3;
+        best_eb=-s;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e-=dq;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_bits=bits;
+          best_cost=cost;
+          qc--;
+        }
+      }
+      else if(qc<=8){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT3;
+        best_eb=(-s<<1)+qc-7;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=6*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=OC_DCT_VAL_CAT2+3;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=-s;
+          best_bits=bits;
+          best_cost=cost;
+          qc=6;
+        }
+      }
+      else if(qc<=12){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT4;
+        best_eb=(-s<<2)+qc-9;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=8*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<1)+1;
+          best_bits=bits;
+          best_cost=cost;
+          qc=8;
+        }
+      }
+      else if(qc<=20){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT5;
+        best_eb=(-s<<3)+qc-13;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=12*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<2)+3;
+          best_bits=bits;
+          best_cost=cost;
+          qc=12;
+        }
+      }
+      else if(qc<=36){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT6;
+        best_eb=(-s<<4)+qc-21;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=20*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<=best_cost){
+          best_token=token;
+          best_eb=(-s<<3)+7;
+          best_bits=bits;
+          best_cost=cost;
+          qc=20;
+        }
+      }
+      else if(qc<=68){
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT7;
+        best_eb=(-s<<5)+qc-37;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=36*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<best_cost){
+          best_token=token;
+          best_eb=(-s<<4)+15;
+          best_bits=bits;
+          best_cost=cost;
+          qc=36;
+        }
+      }
+      else{
+        e=qc*dq-c;
+        d2=e*(ogg_int32_t)e;
+        best_token=OC_DCT_VAL_CAT8;
+        best_eb=(-s<<9)+qc-69;
+        best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token);
+        best_cost=d2+lambda*best_bits;
+        e=68*dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=best_token-1;
+        bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token);
+        cost=d2+lambda*bits;
+        if(cost<best_cost){
+          best_token=token;
+          best_eb=(-s<<5)+31;
+          best_bits=bits;
+          best_cost=cost;
+          qc=68;
+        }
+      }
+      zzj=zzi+1&63;
+      tj=best_flags>>zzj&1;
+      next=(zzj<<1)+tj;
+      tokens[zzi][1].next=(unsigned char)next;
+      tokens[zzi][1].token=(signed char)best_token;
+      tokens[zzi][1].eb=best_eb;
+      tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost;
+      tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits;
+      tokens[zzi][1].qc=qc+s^s;
+      nzflags|=(ogg_int64_t)1<<zzi;
+      best_flags|=(ogg_int64_t)1<<zzi;
+    }
+    zzj=zzi;
+  }
+  /*Emit the tokens from the best path through the trellis.*/
+  stack=*_stack;
+  /*We blow away the first entry here so that things vectorize better.
+    The DC coefficient is not actually stored in the array yet.*/
+  for(zzi=0;zzi<64;zzi++)_qdct[zzi]=0;
+  dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag;
+  zzi=1;
+  ti=best_flags>>1&1;
+  bits=tokens[zzi][ti].bits;
+  do{
+    oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+    eob=eob_run[zzi];
+    if(tokens[zzi][ti].token<OC_NDCT_EOB_TOKEN_MAX){
+      if(++eob>=4095){
+        oc_enc_eob_log(_enc,_pli,zzi,eob);
+        eob=0;
+      }
+      eob_run[zzi]=eob;
+      /*We don't include the actual EOB cost for this block in the return value.
+        It will be paid for by the fragment that terminates the EOB run.*/
+      bits-=tokens[zzi][ti].bits;
+      zzi=_zzi;
+      break;
+    }
+    /*Emit pending EOB run if any.*/
+    if(eob>0){
+      oc_enc_eob_log(_enc,_pli,zzi,eob);
+      eob_run[zzi]=0;
+    }
+    oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb);
+    next=tokens[zzi][ti].next;
+    qc=tokens[zzi][ti].qc;
+    zzj=(next>>1)-1&63;
+    /*TODO: It may be worth saving the dequantized coefficient in the trellis
+       above; we had to compute it to measure the error anyway.*/
+    _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]);
+    zzi=next>>1;
+    ti=next&1;
+  }
+  while(zzi);
+  *_stack=stack;
+  return bits;
+}
+
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  ogg_int16_t             *frag_dc;
+  ptrdiff_t                fragi;
+  int                     *pred_last;
+  int                      nhfrags;
+  int                      fragx;
+  int                      fragy;
+  fplane=_enc->state.fplanes+_pli;
+  frags=_enc->state.frags;
+  frag_dc=_enc->frag_dc;
+  pred_last=_enc->dc_pred_last[_pli];
+  nhfrags=fplane->nhfrags;
+  fragi=fplane->froffset+_fragy0*nhfrags;
+  for(fragy=_fragy0;fragy<_frag_yend;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[ref]);
+          pred_last[ref]=frags[fragi].dc;
+        }
+      }
+    }
+    else{
+      const oc_fragment *u_frags;
+      int                l_ref;
+      int                ul_ref;
+      int                u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else{
+          ur_ref=u_frags[fragi+1].coded?
+           OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+        }
+        if(frags[fragi].coded){
+          int pred;
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==ref)|(ul_ref==ref)<<1|
+           (u_ref==ref)<<2|(ur_ref==ref)<<3){
+            default:pred=pred_last[ref];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred);
+          pred_last[ref]=frags[fragi].dc;
+          l_ref=ref;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+}
+
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1){
+  const ogg_int16_t *frag_dc;
+  ptrdiff_t          fragii;
+  unsigned char     *dct_tokens0;
+  unsigned char     *dct_tokens1;
+  ogg_uint16_t      *extra_bits0;
+  ogg_uint16_t      *extra_bits1;
+  ptrdiff_t          ti0;
+  ptrdiff_t          ti1r;
+  ptrdiff_t          ti1w;
+  int                eob_run0;
+  int                eob_run1;
+  int                neobs1;
+  int                token;
+  int                eb;
+  int                token1=token1;
+  int                eb1=eb1;
+  /*Return immediately if there are no coded fragments; otherwise we'd flush
+     any trailing EOB run into the AC 1 list and never read it back out.*/
+  if(_ncoded_fragis<=0)return;
+  frag_dc=_enc->frag_dc;
+  dct_tokens0=_enc->dct_tokens[_pli][0];
+  dct_tokens1=_enc->dct_tokens[_pli][1];
+  extra_bits0=_enc->extra_bits[_pli][0];
+  extra_bits1=_enc->extra_bits[_pli][1];
+  ti0=_enc->ndct_tokens[_pli][0];
+  ti1w=ti1r=_prev_ndct_tokens1;
+  eob_run0=_enc->eob_run[_pli][0];
+  /*Flush any trailing EOB run for the 1st AC coefficient.
+    This is needed to allow us to track tokens to the end of the list.*/
+  eob_run1=_enc->eob_run[_pli][1];
+  if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1);
+  /*If there was an active EOB run at the start of the 1st AC stack, read it
+     in and decode it.*/
+  if(_prev_eob_run1>0){
+    token1=dct_tokens1[ti1r];
+    eb1=extra_bits1[ti1r];
+    ti1r++;
+    eob_run1=oc_decode_eob_token(token1,eb1);
+    /*Consume the portion of the run that came before these fragments.*/
+    neobs1=eob_run1-_prev_eob_run1;
+  }
+  else eob_run1=neobs1=0;
+  for(fragii=0;fragii<_ncoded_fragis;fragii++){
+    int val;
+    /*All tokens in the 1st AC coefficient stack are regenerated as the DC
+       coefficients are produced.
+      This can be done in-place; stack 1 cannot get larger.*/
+    if(!neobs1){
+      /*There's no active EOB run in stack 1; read the next token.*/
+      token1=dct_tokens1[ti1r];
+      eb1=extra_bits1[ti1r];
+      ti1r++;
+      if(token1<OC_NDCT_EOB_TOKEN_MAX){
+        neobs1=oc_decode_eob_token(token1,eb1);
+        /*It's an EOB run; add it to the current (inactive) one.
+          Because we may have moved entries to stack 0, we may have an
+           opportunity to merge two EOB runs in stack 1.*/
+        eob_run1+=neobs1;
+      }
+    }
+    val=frag_dc[_coded_fragis[fragii]];
+    if(val){
+      /*There was a non-zero DC value, so there's no alteration to stack 1
+         for this fragment; just code the stack 0 token.*/
+      /*Flush any pending EOB run.*/
+      if(eob_run0>0){
+        token=oc_make_eob_token_full(eob_run0,&eb);
+        dct_tokens0[ti0]=(unsigned char)token;
+        extra_bits0[ti0]=(ogg_uint16_t)eb;
+        ti0++;
+        eob_run0=0;
+      }
+      token=oc_make_dct_token_full(0,0,val,&eb);
+      dct_tokens0[ti0]=(unsigned char)token;
+      extra_bits0[ti0]=(ogg_uint16_t)eb;
+      ti0++;
+    }
+    else{
+      /*Zero DC value; that means the entry in stack 1 might need to be coded
+         from stack 0.
+        This requires a stack 1 fixup.*/
+      if(neobs1>0){
+        /*We're in the middle of an active EOB run in stack 1.
+          Move it to stack 0.*/
+        if(++eob_run0>=4095){
+          token=oc_make_eob_token_full(eob_run0,&eb);
+          dct_tokens0[ti0]=(unsigned char)token;
+          extra_bits0[ti0]=(ogg_uint16_t)eb;
+          ti0++;
+          eob_run0=0;
+        }
+        eob_run1--;
+      }
+      else{
+        /*No active EOB run in stack 1, so we can't extend one in stack 0.
+          Flush it if we've got it.*/
+        if(eob_run0>0){
+          token=oc_make_eob_token_full(eob_run0,&eb);
+          dct_tokens0[ti0]=(unsigned char)token;
+          extra_bits0[ti0]=(ogg_uint16_t)eb;
+          ti0++;
+          eob_run0=0;
+        }
+        /*Stack 1 token is one of: a pure zero run token, a single
+           coefficient token, or a zero run/coefficient combo token.
+          A zero run token is expanded and moved to token stack 0, and the
+           stack 1 entry dropped.
+          A single coefficient value may be transformed into combo token that
+           is moved to stack 0, or if it cannot be combined, it is left alone
+           and a single length-1 zero run is emitted in stack 0.
+          A combo token is extended and moved to stack 0.
+          During AC coding, we restrict the run lengths on combo tokens for
+           stack 1 to guarantee we can extend them.*/
+        switch(token1){
+          case OC_DCT_SHORT_ZRL_TOKEN:{
+            if(eb1<7){
+              dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            /*Fall through.*/
+          }
+          case OC_DCT_ZRL_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_ONE_TOKEN:
+          case OC_MINUS_ONE_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_TWO_TOKEN:
+          case OC_MINUS_TWO_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_VAL_CAT2:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A:
+          case OC_DCT_RUN_CAT1A+1:
+          case OC_DCT_RUN_CAT1A+2:
+          case OC_DCT_RUN_CAT1A+3:{
+            dct_tokens0[ti0]=(unsigned char)(token1+1);
+            extra_bits0[ti0]=(ogg_uint16_t)eb1;
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A+4:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1B:{
+            if((eb1&3)<3){
+              dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            eb1=((eb1&4)<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT1C:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1C;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT2A:{
+            eb1=(eb1<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT2B:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+        }
+        /*We can't merge tokens, write a short zero run and keep going.*/
+        dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+        extra_bits0[ti0]=0;
+        ti0++;
+      }
+    }
+    if(!neobs1){
+      /*Flush any (inactive) EOB run.*/
+      if(eob_run1>0){
+        token=oc_make_eob_token_full(eob_run1,&eb);
+        dct_tokens1[ti1w]=(unsigned char)token;
+        extra_bits1[ti1w]=(ogg_uint16_t)eb;
+        ti1w++;
+        eob_run1=0;
+      }
+      /*There's no active EOB run, so log the current token.*/
+      dct_tokens1[ti1w]=(unsigned char)token1;
+      extra_bits1[ti1w]=(ogg_uint16_t)eb1;
+      ti1w++;
+    }
+    else{
+      /*Otherwise consume one EOB from the current run.*/
+      neobs1--;
+      /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/
+      if(eob_run1-neobs1>=4095){
+        token=oc_make_eob_token_full(4095,&eb);
+        dct_tokens1[ti1w]=(unsigned char)token;
+        extra_bits1[ti1w]=(ogg_uint16_t)eb;
+        ti1w++;
+        eob_run1-=4095;
+      }
+    }
+  }
+  /*Save the current state.*/
+  _enc->ndct_tokens[_pli][0]=ti0;
+  _enc->ndct_tokens[_pli][1]=ti1w;
+  _enc->eob_run[_pli][0]=eob_run0;
+  _enc->eob_run[_pli][1]=eob_run1;
+}
+
+/*Final EOB run welding.*/
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc){
+  int pli;
+  int zzi;
+  /*Emit final EOB runs.*/
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    int eob_run;
+    eob_run=_enc->eob_run[pli][zzi];
+    if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run);
+  }
+  /*Merge the final EOB run of one token list with the start of the next, if
+     possible.*/
+  for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){
+    int       old_tok1;
+    int       old_tok2;
+    int       old_eb1;
+    int       old_eb2;
+    int       new_tok;
+    int       new_eb;
+    int       zzj;
+    int       plj;
+    ptrdiff_t ti=ti;
+    int       run_count;
+    /*Make sure this coefficient has tokens at all.*/
+    if(_enc->ndct_tokens[pli][zzi]<=0)continue;
+    /*Ensure the first token is an EOB run.*/
+    old_tok2=_enc->dct_tokens[pli][zzi][0];
+    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Search for a previous coefficient that has any tokens at all.*/
+    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+    for(zzj=zzi,plj=pli;zzj>=0;zzj--){
+      while(plj-->0){
+        ti=_enc->ndct_tokens[plj][zzj]-1;
+        if(ti>=_enc->dct_token_offs[plj][zzj]){
+          old_tok1=_enc->dct_tokens[plj][zzj][ti];
+          break;
+        }
+      }
+      if(plj>=0)break;
+      plj=3;
+    }
+    /*Ensure its last token was an EOB run.*/
+    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Pull off the associated extra bits, if any, and decode the runs.*/
+    old_eb1=_enc->extra_bits[plj][zzj][ti];
+    old_eb2=_enc->extra_bits[pli][zzi][0];
+    run_count=oc_decode_eob_token(old_tok1,old_eb1)
+     +oc_decode_eob_token(old_tok2,old_eb2);
+    /*We can't possibly combine these into one run.
+      It might be possible to split them more optimally, but we'll just leave
+       them as-is.*/
+    if(run_count>=4096)continue;
+    /*We CAN combine them into one run.*/
+    new_tok=oc_make_eob_token_full(run_count,&new_eb);
+    _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok;
+    _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb;
+    _enc->dct_token_offs[pli][zzi]++;
+  }
+}