summaryrefslogtreecommitdiff
path: root/thirdparty/libtheora/collect.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/libtheora/collect.c')
-rw-r--r--thirdparty/libtheora/collect.c974
1 files changed, 974 insertions, 0 deletions
diff --git a/thirdparty/libtheora/collect.c b/thirdparty/libtheora/collect.c
new file mode 100644
index 0000000000..c0d8a2733f
--- /dev/null
+++ b/thirdparty/libtheora/collect.c
@@ -0,0 +1,974 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2011 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: mode selection code
+ last mod: $Id$
+
+ ********************************************************************/
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+#include "collect.h"
+
+#if defined(OC_COLLECT_METRICS)
+
+int OC_HAS_MODE_METRICS;
+double OC_MODE_RD_WEIGHT_SATD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+double OC_MODE_RD_WEIGHT_SAD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+oc_mode_metrics OC_MODE_METRICS_SATD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+oc_mode_metrics OC_MODE_METRICS_SAD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+const char *OC_MODE_METRICS_FILENAME="modedec.stats";
+
+void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _s,int _q,int _r,double _d){
+ if(_metrics->w>0){
+ double ds;
+ double dq;
+ double dr;
+ double dd;
+ double ds2;
+ double dq2;
+ double s2;
+ double sq;
+ double q2;
+ double sr;
+ double qr;
+ double sd;
+ double qd;
+ double s2q;
+ double sq2;
+ double w;
+ double wa;
+ double rwa;
+ double rwa2;
+ double rwb;
+ double rwb2;
+ double rw2;
+ double rw3;
+ double rw4;
+ wa=_metrics->w;
+ ds=_s-_metrics->s/wa;
+ dq=_q-_metrics->q/wa;
+ dr=_r-_metrics->r/wa;
+ dd=_d-_metrics->d/wa;
+ ds2=ds*ds;
+ dq2=dq*dq;
+ s2=_metrics->s2;
+ sq=_metrics->sq;
+ q2=_metrics->q2;
+ sr=_metrics->sr;
+ qr=_metrics->qr;
+ sd=_metrics->sd;
+ qd=_metrics->qd;
+ s2q=_metrics->s2q;
+ sq2=_metrics->sq2;
+ w=wa+_w;
+ rwa=wa/w;
+ rwb=_w/w;
+ rwa2=rwa*rwa;
+ rwb2=rwb*rwb;
+ rw2=wa*rwb;
+ rw3=rw2*(rwa2-rwb2);
+ rw4=_w*rwa2*rwa2+wa*rwb2*rwb2;
+ _metrics->s2q2+=-2*(ds*sq2+dq*s2q)*rwb
+ +(ds2*q2+4*ds*dq*sq+dq2*s2)*rwb2+ds2*dq2*rw4;
+ _metrics->s2q+=(-2*ds*sq-dq*s2)*rwb+ds2*dq*rw3;
+ _metrics->sq2+=(-ds*q2-2*dq*sq)*rwb+ds*dq2*rw3;
+ _metrics->sqr+=(-ds*qr-dq*sr-dr*sq)*rwb+ds*dq*dr*rw3;
+ _metrics->sqd+=(-ds*qd-dq*sd-dd*sq)*rwb+ds*dq*dd*rw3;
+ _metrics->s2+=ds2*rw2;
+ _metrics->sq+=ds*dq*rw2;
+ _metrics->q2+=dq2*rw2;
+ _metrics->sr+=ds*dr*rw2;
+ _metrics->qr+=dq*dr*rw2;
+ _metrics->r2+=dr*dr*rw2;
+ _metrics->sd+=ds*dd*rw2;
+ _metrics->qd+=dq*dd*rw2;
+ _metrics->d2+=dd*dd*rw2;
+ }
+ _metrics->w+=_w;
+ _metrics->s+=_s*_w;
+ _metrics->q+=_q*_w;
+ _metrics->r+=_r*_w;
+ _metrics->d+=_d*_w;
+}
+
+void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n){
+ int i;
+ /*Find a non-empty set of metrics.*/
+ for(i=0;i<_n&&_src[i].w==0;i++);
+ if(i>=_n){
+ memset(_dst,0,sizeof(*_dst));
+ return;
+ }
+ memcpy(_dst,_src+i,sizeof(*_dst));
+ /*And iterate over the remaining non-empty sets of metrics.*/
+ for(i++;i<_n;i++)if(_src[i].w!=0){
+ double ds;
+ double dq;
+ double dr;
+ double dd;
+ double ds2;
+ double dq2;
+ double s2a;
+ double s2b;
+ double sqa;
+ double sqb;
+ double q2a;
+ double q2b;
+ double sra;
+ double srb;
+ double qra;
+ double qrb;
+ double sda;
+ double sdb;
+ double qda;
+ double qdb;
+ double s2qa;
+ double s2qb;
+ double sq2a;
+ double sq2b;
+ double w;
+ double wa;
+ double wb;
+ double rwa;
+ double rwb;
+ double rwa2;
+ double rwb2;
+ double rw2;
+ double rw3;
+ double rw4;
+ wa=_dst->w;
+ wb=_src[i].w;
+ ds=_src[i].s/wb-_dst->s/wa;
+ dq=_src[i].q/wb-_dst->q/wa;
+ dr=_src[i].r/wb-_dst->r/wa;
+ dd=_src[i].d/wb-_dst->d/wa;
+ ds2=ds*ds;
+ dq2=dq*dq;
+ s2a=_dst->s2;
+ sqa=_dst->sq;
+ q2a=_dst->q2;
+ sra=_dst->sr;
+ qra=_dst->qr;
+ sda=_dst->sd;
+ qda=_dst->qd;
+ s2qa=_dst->s2q;
+ sq2a=_dst->sq2;
+ s2b=_src[i].s2;
+ sqb=_src[i].sq;
+ q2b=_src[i].q2;
+ srb=_src[i].sr;
+ qrb=_src[i].qr;
+ sdb=_src[i].sd;
+ qdb=_src[i].qd;
+ s2qb=_src[i].s2q;
+ sq2b=_src[i].sq2;
+ w=wa+wb;
+ if(w==0)rwa=rwb=0;
+ else{
+ rwa=wa/w;
+ rwb=wb/w;
+ }
+ rwa2=rwa*rwa;
+ rwb2=rwb*rwb;
+ rw2=wa*rwb;
+ rw3=rw2*(rwa2-rwb2);
+ rw4=wb*rwa2*rwa2+wa*rwb2*rwb2;
+ /*
+ (1,1,1) ->
+ (0,0,0)#
+ (1,0,0) C(1,1)*C(1,0)*C(1,0)-> d^{1,0,0}*(rwa*B_{0,1,1}-rwb*A_{0,1,1})
+ (0,1,0) C(1,0)*C(1,1)*C(1,0)-> d^{0,1,0}*(rwa*B_{1,0,1}-rwb*A_{1,0,1})
+ (0,0,1) C(1,0)*C(1,0)*C(1,1)-> d^{0,0,1}*(rwa*B_{1,1,0}-rwb*A_{1,1,0})
+ (1,1,0)*
+ (1,0,1)*
+ (0,1,1)*
+ (1,1,1) C(1,1)*C(1,1)*C(1,1)-> d^{1,1,1}*(rwa^3*wb-rwb^3*wa)
+ (2,1) ->
+ (0,0)#
+ (1,0) C(2,1)*C(1,1)->2*d^{1,0}*(rwa*B_{1,1}-rwb*A_{1,1})
+ (0,1) C(2,0)*C(1,1)-> d^{0,1}*(rwa*B_{2,0}-rwb*A_{2,0})
+ (2,0)*
+ (1,1)*
+ (2,1) C(2,2)*C(1,1)-> d^{2,1}*(rwa^3*wb-rwb^3*wa)
+ (2,2) ->
+ (0,0)#
+ (1,0) C(2,1)*C(2,0)->2*d^{1,0}*(rwa*B_{1,2}-rwb*A_{1,2})
+ (0,1) C(2,0)*C(2,1)->2*d^{0,1}*(rwa*B_{2,1}-rwb*A_{2,1})
+ (2,0) C(2,2)*C(2,0)-> d^{2,0}*(rwa^2*B_{0,2}+rwb^2*A_{0,2})
+ (1,1) C(2,1)*C(2,1)->4*d^{1,1}*(rwa^2*B_{1,1}+rwb^2*A_{1,1})
+ (0,2) C(2,0)*C(2,2)-> d^{0,2}*(rwa^2*B_{2,0}+rwb^2*A_{2,0})
+ (1,2)*
+ (2,1)*
+ (2,2) C(2,2)*C(2,2)*d^{2,2}*(rwa^4*wb+rwb^4*wa)
+ */
+ _dst->s2q2+=_src[i].s2q2+2*(ds*(rwa*sq2b-rwb*sq2a)+dq*(rwa*s2qb-rwb*s2qa))
+ +ds2*(rwa2*q2b+rwb2*q2a)+4*ds*dq*(rwa2*sqb+rwb2*sqa)
+ +dq2*(rwa2*s2b+rwb2*s2a)+ds2*dq2*rw4;
+ _dst->s2q+=_src[i].s2q+2*ds*(rwa*sqb-rwb*sqa)
+ +dq*(rwa*s2b-rwb*s2a)+ds2*dq*rw3;
+ _dst->sq2+=_src[i].sq2+ds*(rwa*q2b-rwb*q2a)
+ +2*dq*(rwa*sqb-rwb*sqa)+ds*dq2*rw3;
+ _dst->sqr+=_src[i].sqr+ds*(rwa*qrb-rwb*qra)+dq*(rwa*srb-rwb*sra)
+ +dr*(rwa*sqb-rwb*sqa)+ds*dq*dr*rw3;
+ _dst->sqd+=_src[i].sqd+ds*(rwa*qdb-rwb*qda)+dq*(rwa*sdb-rwb*sda)
+ +dd*(rwa*sqb-rwb*sqa)+ds*dq*dd*rw3;
+ _dst->s2+=_src[i].s2+ds2*rw2;
+ _dst->sq+=_src[i].sq+ds*dq*rw2;
+ _dst->q2+=_src[i].q2+dq2*rw2;
+ _dst->sr+=_src[i].sr+ds*dr*rw2;
+ _dst->qr+=_src[i].qr+dq*dr*rw2;
+ _dst->r2+=_src[i].r2+dr*dr*rw2;
+ _dst->sd+=_src[i].sd+ds*dd*rw2;
+ _dst->qd+=_src[i].qd+dq*dd*rw2;
+ _dst->d2+=_src[i].d2+dd*dd*rw2;
+ _dst->w+=_src[i].w;
+ _dst->s+=_src[i].s;
+ _dst->q+=_src[i].q;
+ _dst->r+=_src[i].r;
+ _dst->d+=_src[i].d;
+ }
+}
+
+/*Adjust a single corner of a set of metric bins to minimize the squared
+ prediction error of R and D.
+ Each bin is assumed to cover a quad like so:
+ (s0,q0) (s1,q0)
+ A----------B
+ | |
+ | |
+ | |
+ | |
+ C----------Z
+ (s0,q1) (s1,q1)
+ The values A, B, and C are fixed, and Z is the free parameter.
+ Then, for example, R_i is predicted via bilinear interpolation as
+ x_i=(s_i-s0)/(s1-s0)
+ y_i=(q_i-q0)/(q1-q0)
+ dRds1_i=A+(B-A)*x_i
+ dRds2_i=C+(Z-C)*x_i
+ R_i=dRds1_i+(dRds2_i-dRds1_i)*y_i
+ To find the Z that minimizes the squared prediction error over i, this can
+ be rewritten as
+ R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i)=x_i*y_i*Z
+ Letting X={...,x_i*y_i,...}^T and
+ Y={...,R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i),...}^T,
+ the optimal Z is given by Z=(X^T.Y)/(X^T.X).
+ Now, we need to compute these dot products without actually storing data for
+ each sample.
+ Starting with X^T.X, we have
+ X^T.X = sum(x_i^2*y_i^2) = sum((s_i-s0)^2*(q_i-q0)^2)/((s1-s0)^2*(q1-q0)^2).
+ Expanding the interior of the sum in a monomial basis of s_i and q_i gives
+ s0^2*q0^2 *(1)
+ -2*s0*q0^2*(s_i)
+ -2*s0^2*q0*(q_i)
+ +q0^2 *(s_i^2)
+ +4*s0*q0 *(s_i*q_i)
+ +s0^2 *(q_i^2)
+ -2*q0 *(s_i^2*q_i)
+ -2*s0 *(s_i*q_i^2)
+ +1 *(s_i^2*q_i^2).
+ However, computing things directly in this basis leads to gross numerical
+ errors, as most of the terms will have similar size and destructive
+ cancellation results.
+ A much better basis is the central (co-)moment basis:
+ {1,s_i-sbar,q_i-qbar,(s_i-sbar)^2,(s_i-sbar)*(q_i-qbar),(q_i-qbar)^2,
+ (s_i-sbar)^2*(q_i-qbar),(s_i-sbar)*(q_i-qbar)^2,(s_i-sbar)^2*(q_i-qbar)^2},
+ where sbar and qbar are the average s and q values over the bin,
+ respectively.
+ In that basis, letting ds=sbar-s0 and dq=qbar-q0, (s_i-s0)^2*(q_i-q0)^2 is
+ ds^2*dq^2*(1)
+ +dq^2 *((s_i-sbar)^2)
+ +4*ds*dq*((s_i-sbar)*(q_i-qbar))
+ +ds^2 *((q_i-qbar)^2)
+ +2*dq *((s_i-sbar)^2*(q_i-qbar))
+ +2*ds *((s_i-sbar)*(q_i-qbar)^2)
+ +1 *((s_i-sbar)^2*(q_i-qbar)^2).
+ With these expressions in the central (co-)moment bases, all we need to do
+ is compute sums over the (co-)moment terms, which can be done
+ incrementally (see oc_mode_metrics_add() and oc_mode_metrics_merge()),
+ with no need to store the individual samples.
+ Now, for X^T.Y, we have
+ X^T.Y = sum((R_i-A-((B-A)/(s1-s0))*(s_i-s0)-((C-A)/(q1-q0))*(q_i-q0)
+ -((A-B-C)/((s1-s0)*(q1-q0)))*(s_i-s0)*(q_i-q0))*(s_i-s0)*(q_i-q0))/
+ ((s1-s0)*(q1-q0)),
+ or, rewriting the constants to simplify notation,
+ X^T.Y = sum((C0+C1*(s_i-s0)+C2*(q_i-q0)
+ +C3*(s_i-s0)*(q_i-q0)+R_i)*(s_i-s0)*(q_i-q0))/((s1-s0)*(q1-q0)).
+ Again, converting to the central (co-)moment basis, the interior of the
+ above sum is
+ ds*dq*(rbar+C0+C1*ds+C2*dq+C3*ds*dq) *(1)
+ +(C1*dq+C3*dq^2) *((s_i-sbar)^2)
+ +(rbar+C0+2*C1*ds+2*C2*dq+4*C3*ds*dq)*((s_i-sbar)*(q_i-qbar))
+ +(C2*ds+C3*ds^2) *((q_i-qbar)^2)
+ +dq *((s_i-sbar)*(r_i-rbar))
+ +ds *((q_i-qbar)*(r_i-rbar))
+ +(C1+2*C3*dq) *((s_i-sbar)^2*(q_i-qbar))
+ +(C2+2*C3*ds) *((s_i-sbar)*(q_i-qbar)^2)
+ +1 *((s_i-sbar)*(q_i-qbar)*(r_i-rbar))
+ +C3 *((s_i-sbar)^2*(q_i-qbar)^2).
+ You might think it would be easier (if perhaps slightly less robust) to
+ accumulate terms directly around s0 and q0.
+ However, we update each corner of the bins in turn, so we would have to
+ change basis to move the sums from corner to corner anyway.*/
+double oc_mode_metrics_solve(double *_r,double *_d,
+ const oc_mode_metrics *_metrics,const int *_s0,const int *_s1,
+ const int *_q0,const int *_q1,
+ const double *_ra,const double *_rb,const double *_rc,
+ const double *_da,const double *_db,const double *_dc,int _n){
+ double xx;
+ double rxy;
+ double dxy;
+ double wt;
+ int i;
+ xx=rxy=dxy=wt=0;
+ for(i=0;i<_n;i++)if(_metrics[i].w>0){
+ double s10;
+ double q10;
+ double sq10;
+ double ds;
+ double dq;
+ double ds2;
+ double dq2;
+ double r;
+ double d;
+ double s2;
+ double sq;
+ double q2;
+ double sr;
+ double qr;
+ double sd;
+ double qd;
+ double s2q;
+ double sq2;
+ double sqr;
+ double sqd;
+ double s2q2;
+ double c0;
+ double c1;
+ double c2;
+ double c3;
+ double w;
+ w=_metrics[i].w;
+ wt+=w;
+ s10=_s1[i]-_s0[i];
+ q10=_q1[i]-_q0[i];
+ sq10=s10*q10;
+ ds=_metrics[i].s/w-_s0[i];
+ dq=_metrics[i].q/w-_q0[i];
+ ds2=ds*ds;
+ dq2=dq*dq;
+ s2=_metrics[i].s2;
+ sq=_metrics[i].sq;
+ q2=_metrics[i].q2;
+ s2q=_metrics[i].s2q;
+ sq2=_metrics[i].sq2;
+ s2q2=_metrics[i].s2q2;
+ xx+=(dq2*(ds2*w+s2)+4*ds*dq*sq+ds2*q2+2*(dq*s2q+ds*sq2)+s2q2)/(sq10*sq10);
+ r=_metrics[i].r/w;
+ sr=_metrics[i].sr;
+ qr=_metrics[i].qr;
+ sqr=_metrics[i].sqr;
+ c0=-_ra[i];
+ c1=-(_rb[i]-_ra[i])/s10;
+ c2=-(_rc[i]-_ra[i])/q10;
+ c3=-(_ra[i]-_rb[i]-_rc[i])/sq10;
+ rxy+=(ds*dq*(r+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
+ +(r+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sr+ds*qr
+ +(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqr+c3*s2q2)/sq10;
+ d=_metrics[i].d/w;
+ sd=_metrics[i].sd;
+ qd=_metrics[i].qd;
+ sqd=_metrics[i].sqd;
+ c0=-_da[i];
+ c1=-(_db[i]-_da[i])/s10;
+ c2=-(_dc[i]-_da[i])/q10;
+ c3=-(_da[i]-_db[i]-_dc[i])/sq10;
+ dxy+=(ds*dq*(d+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
+ +(d+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sd+ds*qd
+ +(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqd+c3*s2q2)/sq10;
+ }
+ if(xx>1E-3){
+ *_r=rxy/xx;
+ *_d=dxy/xx;
+ }
+ else{
+ *_r=0;
+ *_d=0;
+ }
+ return wt;
+}
+
+/*Compile collected SATD/logq/rate/RMSE metrics into a form that's immediately
+ useful for mode decision.*/
+void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS],
+ int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS],
+ int _shift,double (*_weight)[3][2][OC_COMP_BINS]){
+ int niters;
+ int prevdr;
+ int prevdd;
+ int dr;
+ int dd;
+ int pli;
+ int qti;
+ int qi;
+ int si;
+ dd=dr=INT_MAX;
+ niters=0;
+ /*The encoder interpolates rate and RMSE terms bilinearly from an
+ OC_LOGQ_BINS by OC_COMP_BINS grid of sample points in _table.
+ To find the sample values at the grid points that minimize the total
+ squared prediction error actually requires solving a relatively sparse
+ linear system with a number of variables equal to the number of grid
+ points.
+ Instead of writing a general sparse linear system solver, we just use
+ Gauss-Seidel iteration, i.e., we update one grid point at time until
+ they stop changing.*/
+ do{
+ prevdr=dr;
+ prevdd=dd;
+ dd=dr=0;
+ for(pli=0;pli<3;pli++){
+ for(qti=0;qti<2;qti++){
+ for(qi=0;qi<OC_LOGQ_BINS;qi++){
+ for(si=0;si<OC_COMP_BINS;si++){
+ oc_mode_metrics m[4];
+ int s0[4];
+ int s1[4];
+ int q0[4];
+ int q1[4];
+ double ra[4];
+ double rb[4];
+ double rc[4];
+ double da[4];
+ double db[4];
+ double dc[4];
+ double r;
+ double d;
+ int rate;
+ int rmse;
+ int ds;
+ int n;
+ n=0;
+ /*Collect the statistics for the (up to) four bins grid point
+ (si,qi) touches.*/
+ if(qi>0&&si>0){
+ q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
+ q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+ s0[n]=si-1<<_shift;
+ s1[n]=si<<_shift;
+ ra[n]=ldexp(_table[qi-1][pli][qti][si-1].rate,-OC_BIT_SCALE);
+ da[n]=ldexp(_table[qi-1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+ rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
+ db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+ rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
+ dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+ *(m+n++)=*(_metrics[qi-1][pli][qti]+si-1);
+ }
+ if(qi>0){
+ ds=si+1<OC_COMP_BINS?1:-1;
+ q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
+ q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+ s0[n]=si+ds<<_shift;
+ s1[n]=si<<_shift;
+ ra[n]=ldexp(_table[qi-1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+ da[n]=
+ ldexp(_table[qi-1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+ rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
+ db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+ rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+ dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+ *(m+n++)=*(_metrics[qi-1][pli][qti]+si);
+ }
+ if(qi+1<OC_LOGQ_BINS&&si>0){
+ q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
+ q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+ s0[n]=si-1<<_shift;
+ s1[n]=si<<_shift;
+ ra[n]=ldexp(_table[qi+1][pli][qti][si-1].rate,-OC_BIT_SCALE);
+ da[n]=ldexp(_table[qi+1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+ rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
+ db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+ rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
+ dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+ *(m+n++)=*(_metrics[qi][pli][qti]+si-1);
+ }
+ if(qi+1<OC_LOGQ_BINS){
+ ds=si+1<OC_COMP_BINS?1:-1;
+ q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
+ q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+ s0[n]=si+ds<<_shift;
+ s1[n]=si<<_shift;
+ ra[n]=ldexp(_table[qi+1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+ da[n]=
+ ldexp(_table[qi+1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+ rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
+ db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+ rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+ dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+ *(m+n++)=*(_metrics[qi][pli][qti]+si);
+ }
+ /*On the first pass, initialize with a simple weighted average of
+ the neighboring bins.*/
+ if(!OC_HAS_MODE_METRICS&&niters==0){
+ double w;
+ w=r=d=0;
+ while(n-->0){
+ w+=m[n].w;
+ r+=m[n].r;
+ d+=m[n].d;
+ }
+ r=w>1E-3?r/w:0;
+ d=w>1E-3?d/w:0;
+ _weight[qi][pli][qti][si]=w;
+ }
+ else{
+ /*Update the grid point and save the weight for later.*/
+ _weight[qi][pli][qti][si]=
+ oc_mode_metrics_solve(&r,&d,m,s0,s1,q0,q1,ra,rb,rc,da,db,dc,n);
+ }
+ rate=OC_CLAMPI(-32768,(int)(ldexp(r,OC_BIT_SCALE)+0.5),32767);
+ rmse=OC_CLAMPI(-32768,(int)(ldexp(d,OC_RMSE_SCALE)+0.5),32767);
+ dr+=abs(rate-_table[qi][pli][qti][si].rate);
+ dd+=abs(rmse-_table[qi][pli][qti][si].rmse);
+ _table[qi][pli][qti][si].rate=(ogg_int16_t)rate;
+ _table[qi][pli][qti][si].rmse=(ogg_int16_t)rmse;
+ }
+ }
+ }
+ }
+ }
+ /*After a fixed number of initial iterations, only iterate so long as the
+ total change is decreasing.
+ This ensures we don't oscillate forever, which is a danger, as all of our
+ results are rounded fairly coarsely.*/
+ while((dr>0||dd>0)&&(niters++<_niters_min||(dr<prevdr&&dd<prevdd)));
+ if(_reweight){
+ /*Now, reduce the values of the optimal solution until we get enough
+ samples in each bin to overcome the constant OC_ZWEIGHT factor.
+ This encourages sampling under-populated bins and prevents a single large
+ sample early on from discouraging coding in that bin ever again.*/
+ for(pli=0;pli<3;pli++){
+ for(qti=0;qti<2;qti++){
+ for(qi=0;qi<OC_LOGQ_BINS;qi++){
+ for(si=0;si<OC_COMP_BINS;si++){
+ double wt;
+ wt=_weight[qi][pli][qti][si];
+ wt/=OC_ZWEIGHT+wt;
+ _table[qi][pli][qti][si].rate=(ogg_int16_t)
+ (_table[qi][pli][qti][si].rate*wt+0.5);
+ _table[qi][pli][qti][si].rmse=(ogg_int16_t)
+ (_table[qi][pli][qti][si].rmse*wt+0.5);
+ }
+ }
+ }
+ }
+ }
+}
+
+/*Dump the in memory mode metrics to a file.
+ Note this data format isn't portable between different platforms.*/
+void oc_mode_metrics_dump(void){
+ FILE *fmetrics;
+ fmetrics=fopen(OC_MODE_METRICS_FILENAME,"wb");
+ if(fmetrics!=NULL){
+ (void)fwrite(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
+ (void)fwrite(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
+ (void)fwrite(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
+ fclose(fmetrics);
+ }
+}
+
+void oc_mode_metrics_print_rd(FILE *_fout,const char *_table_name,
+#if !defined(OC_COLLECT_METRICS)
+ const oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
+#else
+ oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
+#endif
+ int qii;
+ fprintf(_fout,
+ "# if !defined(OC_COLLECT_METRICS)\n"
+ "static const\n"
+ "# endif\n"
+ "oc_mode_rd %s[OC_LOGQ_BINS][3][2][OC_COMP_BINS]={\n",_table_name);
+ for(qii=0;qii<OC_LOGQ_BINS;qii++){
+ int pli;
+ fprintf(_fout," {\n");
+ for(pli=0;pli<3;pli++){
+ int qti;
+ fprintf(_fout," {\n");
+ for(qti=0;qti<2;qti++){
+ int bin;
+ int qi;
+ static const char *pl_names[3]={"Y'","Cb","Cr"};
+ static const char *qti_names[2]={"INTRA","INTER"};
+ qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
+ fprintf(_fout," /*%s qi=%i %s*/\n",
+ pl_names[pli],qi,qti_names[qti]);
+ fprintf(_fout," {\n");
+ fprintf(_fout," ");
+ for(bin=0;bin<OC_COMP_BINS;bin++){
+ if(bin&&!(bin&0x3))fprintf(_fout,"\n ");
+ fprintf(_fout,"{%5i,%5i}",
+ _mode_rd_table[qii][pli][qti][bin].rate,
+ _mode_rd_table[qii][pli][qti][bin].rmse);
+ if(bin+1<OC_COMP_BINS)fprintf(_fout,",");
+ }
+ fprintf(_fout,"\n }");
+ if(qti<1)fprintf(_fout,",");
+ fprintf(_fout,"\n");
+ }
+ fprintf(_fout," }");
+ if(pli<2)fprintf(_fout,",");
+ fprintf(_fout,"\n");
+ }
+ fprintf(_fout," }");
+ if(qii+1<OC_LOGQ_BINS)fprintf(_fout,",");
+ fprintf(_fout,"\n");
+ }
+ fprintf(_fout,
+ "};\n"
+ "\n");
+}
+
+void oc_mode_metrics_print(FILE *_fout){
+ int qii;
+ fprintf(_fout,
+ "/*File generated by libtheora with OC_COLLECT_METRICS"
+ " defined at compile time.*/\n"
+ "#if !defined(_modedec_H)\n"
+ "# define _modedec_H (1)\n"
+ "# include \"encint.h\"\n"
+ "\n"
+ "\n"
+ "\n"
+ "/*The log of the average quantizer for each of the OC_MODE_RD table rows\n"
+ " (e.g., for the represented qi's, and each pli and qti), in Q10 format.\n"
+ " The actual statistics used by the encoder will be interpolated from\n"
+ " that table based on log_plq for the actual quantization matrix used.*/\n"
+ "# if !defined(OC_COLLECT_METRICS)\n"
+ "static const\n"
+ "# endif\n"
+ "ogg_int16_t OC_MODE_LOGQ[OC_LOGQ_BINS][3][2]={\n");
+ for(qii=0;qii<OC_LOGQ_BINS;qii++){
+ fprintf(_fout," { {0x%04X,0x%04X},{0x%04X,0x%04X},{0x%04X,0x%04X} }%s\n",
+ OC_MODE_LOGQ[qii][0][0],OC_MODE_LOGQ[qii][0][1],OC_MODE_LOGQ[qii][1][0],
+ OC_MODE_LOGQ[qii][1][1],OC_MODE_LOGQ[qii][2][0],OC_MODE_LOGQ[qii][2][1],
+ qii+1<OC_LOGQ_BINS?",":"");
+ }
+ fprintf(_fout,
+ "};\n"
+ "\n");
+ oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SATD",OC_MODE_RD_SATD);
+ oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SAD",OC_MODE_RD_SAD);
+ fprintf(_fout,
+ "#endif\n");
+}
+
+
+# if !defined(OC_COLLECT_NO_ENC_FUNCS)
+void oc_enc_mode_metrics_load(oc_enc_ctx *_enc){
+ oc_restore_fpu(&_enc->state);
+ /*Load any existing mode metrics if we haven't already.*/
+ if(!OC_HAS_MODE_METRICS){
+ FILE *fmetrics;
+ memset(OC_MODE_METRICS_SATD,0,sizeof(OC_MODE_METRICS_SATD));
+ memset(OC_MODE_METRICS_SAD,0,sizeof(OC_MODE_METRICS_SAD));
+ fmetrics=fopen(OC_MODE_METRICS_FILENAME,"rb");
+ if(fmetrics!=NULL){
+ /*Read in the binary structures as written my oc_mode_metrics_dump().
+ Note this format isn't portable between different platforms.*/
+ (void)fread(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
+ (void)fread(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
+ (void)fread(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
+ fclose(fmetrics);
+ }
+ else{
+ int qii;
+ int qi;
+ int pli;
+ int qti;
+ for(qii=0;qii<OC_LOGQ_BINS;qii++){
+ qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
+ for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+ OC_MODE_LOGQ[qii][pli][qti]=_enc->log_plq[qi][pli][qti];
+ }
+ }
+ }
+ oc_mode_metrics_update(OC_MODE_METRICS_SATD,100,1,
+ OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
+ oc_mode_metrics_update(OC_MODE_METRICS_SAD,100,1,
+ OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
+ OC_HAS_MODE_METRICS=1;
+ }
+}
+
+/*The following token skipping code used to also be used in the decoder (and
+ even at one point other places in the encoder).
+ However, it was obsoleted by other optimizations, and is now only used here.
+ It has been moved here to avoid generating the code when it's not needed.*/
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+ token value.
+ _token: The token value to skip.
+ _extra_bits: The extra bits attached to this token.
+ Return: A positive value indicates that number of coefficients are to be
+ skipped in the current block.
+ Otherwise, the negative of the return value indicates that number of
+ blocks are to be ended.*/
+typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
+
+/*Handles the simple end of block tokens.*/
+static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
+ int nblocks_adjust;
+ nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
+ return -_extra_bits-nblocks_adjust;
+}
+
+/*The last EOB token has a special case, where an EOB run of size zero ends all
+ the remaining blocks in the frame.*/
+static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
+ /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
+ yet available everywhere; this should be equivalent.*/
+ if(!_extra_bits)return -(~(size_t)0>>1);
+ return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
+ return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static ptrdiff_t oc_token_skip_val(void){
+ return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
+ return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+ int run_cati;
+ int ncoeffs_mask;
+ int ncoeffs_adjust;
+ run_cati=_token-OC_DCT_RUN_CAT1B;
+ ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
+ ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
+ return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+ a given token value.
+ This reduces all the conditional branches, etc., needed to parse these token
+ values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob6,
+ oc_token_skip_zrl,
+ oc_token_skip_zrl,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ oc_token_skip_run,
+ oc_token_skip_run,
+ oc_token_skip_run,
+ oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+ token value.
+ _token: The token value to skip.
+ _extra_bits: The extra bits attached to this token.
+ Return: A positive value indicates that number of coefficients are to be
+ skipped in the current block.
+ Otherwise, the negative of the return value indicates that number of
+ blocks are to be ended.
+ 0 will never be returned, so that at least one coefficient in one
+ block will always be decoded for every token.*/
+static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
+ return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
+ static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+ 0,16,16,16,16,16,32,32,
+ 32,32,32,32,32,32,32,48,
+ 48,48,48,48,48,48,48,48,
+ 48,48,48,48,64,64,64,64,
+ 64,64,64,64,64,64,64,64,
+ 64,64,64,64,64,64,64,64,
+ 64,64,64,64,64,64,64,64
+ };
+ const oc_fragment *frags;
+ const unsigned *frag_sad;
+ const unsigned *frag_satd;
+ const unsigned *frag_ssd;
+ const ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragii;
+ double fragw;
+ int modelines[3][3][2];
+ int qti;
+ int qii;
+ int qi;
+ int pli;
+ int zzi;
+ int token;
+ int eb;
+ oc_restore_fpu(&_enc->state);
+ /*Figure out which metric bins to use for this frame's quantizers.*/
+ for(qii=0;qii<_enc->state.nqis;qii++){
+ for(pli=0;pli<3;pli++){
+ for(qti=0;qti<2;qti++){
+ int log_plq;
+ int modeline;
+ log_plq=_enc->log_plq[_enc->state.qis[qii]][pli][qti];
+ for(modeline=0;modeline<OC_LOGQ_BINS-1&&
+ OC_MODE_LOGQ[modeline+1][pli][qti]>log_plq;modeline++);
+ modelines[qii][pli][qti]=modeline;
+ }
+ }
+ }
+ qti=_enc->state.frame_type;
+ frags=_enc->state.frags;
+ frag_sad=_enc->frag_sad;
+ frag_satd=_enc->frag_satd;
+ frag_ssd=_enc->frag_ssd;
+ coded_fragis=_enc->state.coded_fragis;
+ ncoded_fragis=fragii=0;
+ /*Weight the fragments by the inverse frame size; this prevents HD content
+ from dominating the statistics.*/
+ fragw=1.0/_enc->state.nfrags;
+ for(pli=0;pli<3;pli++){
+ ptrdiff_t ti[64];
+ int eob_token[64];
+ int eob_run[64];
+ /*Set up token indices and eob run counts.
+ We don't bother trying to figure out the real cost of the runs that span
+ coefficients; instead we use the costs that were available when R-D
+ token optimization was done.*/
+ for(zzi=0;zzi<64;zzi++){
+ ti[zzi]=_enc->dct_token_offs[pli][zzi];
+ if(ti[zzi]>0){
+ token=_enc->dct_tokens[pli][zzi][0];
+ eb=_enc->extra_bits[pli][zzi][0];
+ eob_token[zzi]=token;
+ eob_run[zzi]=-oc_dct_token_skip(token,eb);
+ }
+ else{
+ eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+ eob_run[zzi]=0;
+ }
+ }
+ /*Scan the list of coded fragments for this plane.*/
+ ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+ for(;fragii<ncoded_fragis;fragii++){
+ ptrdiff_t fragi;
+ int frag_bits;
+ int huffi;
+ int skip;
+ int mb_mode;
+ unsigned sad;
+ unsigned satd;
+ double sqrt_ssd;
+ int bin;
+ int qtj;
+ fragi=coded_fragis[fragii];
+ frag_bits=0;
+ for(zzi=0;zzi<64;){
+ if(eob_run[zzi]>0){
+ /*We've reached the end of the block.*/
+ eob_run[zzi]--;
+ break;
+ }
+ huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
+ +OC_ZZI_HUFF_OFFSET[zzi];
+ if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
+ /*This token caused an EOB run to be flushed.
+ Therefore it gets the bits associated with it.*/
+ frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
+ +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
+ eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+ }
+ token=_enc->dct_tokens[pli][zzi][ti[zzi]];
+ eb=_enc->extra_bits[pli][zzi][ti[zzi]];
+ ti[zzi]++;
+ skip=oc_dct_token_skip(token,eb);
+ if(skip<0){
+ eob_token[zzi]=token;
+ eob_run[zzi]=-skip;
+ }
+ else{
+ /*A regular DCT value token; accumulate the bits for it.*/
+ frag_bits+=_enc->huff_codes[huffi][token].nbits
+ +OC_DCT_TOKEN_EXTRA_BITS[token];
+ zzi+=skip;
+ }
+ }
+ mb_mode=frags[fragi].mb_mode;
+ qii=frags[fragi].qii;
+ qi=_enc->state.qis[qii];
+ sad=frag_sad[fragi]<<(pli+1&2);
+ satd=frag_satd[fragi]<<(pli+1&2);
+ sqrt_ssd=sqrt(frag_ssd[fragi]);
+ qtj=mb_mode!=OC_MODE_INTRA;
+ /*Accumulate statistics.
+ The rate (frag_bits) and RMSE (sqrt(frag_ssd)) are not scaled by
+ OC_BIT_SCALE and OC_RMSE_SCALE; this lets us change the scale factor
+ yet still use old data.*/
+ bin=OC_MINI(satd>>OC_SATD_SHIFT,OC_COMP_BINS-1);
+ oc_mode_metrics_add(
+ OC_MODE_METRICS_SATD[modelines[qii][pli][qtj]][pli][qtj]+bin,
+ fragw,satd,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
+ bin=OC_MINI(sad>>OC_SAD_SHIFT,OC_COMP_BINS-1);
+ oc_mode_metrics_add(
+ OC_MODE_METRICS_SAD[modelines[qii][pli][qtj]][pli][qtj]+bin,
+ fragw,sad,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
+ }
+ }
+ /*Update global SA(T)D/logq/rate/RMSE estimation matrix.*/
+ oc_mode_metrics_update(OC_MODE_METRICS_SATD,4,1,
+ OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
+ oc_mode_metrics_update(OC_MODE_METRICS_SAD,4,1,
+ OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
+}
+# endif
+
+#endif