diff options
Diffstat (limited to 'thirdparty/libtheora/enquant.c')
-rw-r--r-- | thirdparty/libtheora/enquant.c | 198 |
1 files changed, 147 insertions, 51 deletions
diff --git a/thirdparty/libtheora/enquant.c b/thirdparty/libtheora/enquant.c index 3372fed221..8fd220edd7 100644 --- a/thirdparty/libtheora/enquant.c +++ b/thirdparty/libtheora/enquant.c @@ -11,7 +11,7 @@ ******************************************************************** function: - last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $ + last mod: $Id$ ********************************************************************/ #include <stdlib.h> @@ -20,6 +20,69 @@ +int oc_quant_params_clone(th_quant_info *_dst,const th_quant_info *_src){ + int i; + memcpy(_dst,_src,sizeof(*_dst)); + memset(_dst->qi_ranges,0,sizeof(_dst->qi_ranges)); + for(i=0;i<6;i++){ + int nranges; + int qti; + int pli; + int qtj; + int plj; + int pdup; + int qdup; + qti=i/3; + pli=i%3; + qtj=(i-1)/3; + plj=(i-1)%3; + nranges=_src->qi_ranges[qti][pli].nranges; + /*Check for those duplicates that can be cleanly handled by + oc_quant_params_clear().*/ + pdup=i>0&&nranges<=_src->qi_ranges[qtj][plj].nranges; + qdup=qti>0&&nranges<=_src->qi_ranges[0][pli].nranges; + _dst->qi_ranges[qti][pli].nranges=nranges; + if(pdup&&_src->qi_ranges[qti][pli].sizes==_src->qi_ranges[qtj][plj].sizes){ + _dst->qi_ranges[qti][pli].sizes=_dst->qi_ranges[qtj][plj].sizes; + } + else if(qdup&&_src->qi_ranges[1][pli].sizes==_src->qi_ranges[0][pli].sizes){ + _dst->qi_ranges[1][pli].sizes=_dst->qi_ranges[0][pli].sizes; + } + else{ + int *sizes; + sizes=(int *)_ogg_malloc(nranges*sizeof(*sizes)); + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(sizes==NULL)return TH_EFAULT; + memcpy(sizes,_src->qi_ranges[qti][pli].sizes,nranges*sizeof(*sizes)); + _dst->qi_ranges[qti][pli].sizes=sizes; + } + if(pdup&&_src->qi_ranges[qti][pli].base_matrices== + _src->qi_ranges[qtj][plj].base_matrices){ + _dst->qi_ranges[qti][pli].base_matrices= + _dst->qi_ranges[qtj][plj].base_matrices; + } + else if(qdup&&_src->qi_ranges[1][pli].base_matrices== + _src->qi_ranges[0][pli].base_matrices){ + _dst->qi_ranges[1][pli].base_matrices= + _dst->qi_ranges[0][pli].base_matrices; + } + else{ + th_quant_base *base_matrices; + base_matrices=(th_quant_base *)_ogg_malloc( + (nranges+1)*sizeof(*base_matrices)); + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(base_matrices==NULL)return TH_EFAULT; + memcpy(base_matrices,_src->qi_ranges[qti][pli].base_matrices, + (nranges+1)*sizeof(*base_matrices)); + _dst->qi_ranges[qti][pli].base_matrices= + (const th_quant_base *)base_matrices; + } + } + return 0; +} + void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ const th_quant_ranges *qranges; const th_quant_base *base_mats[2*3*64]; @@ -119,7 +182,7 @@ void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ } } -static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ +void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ ogg_uint32_t t; int l; _d<<=1; @@ -129,48 +192,61 @@ static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ _this->l=l; } -/*See comments at oc_dequant_tables_init() for how the quantization tables' - storage should be initialized.*/ -void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], - oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){ - int qi; +void oc_enc_enquant_table_init_c(void *_enquant, + const ogg_uint16_t _dequant[64]){ + oc_iquant *enquant; + int zzi; + /*In the original VP3.2 code, the rounding offset and the size of the + dead zone around 0 were controlled by a "sharpness" parameter. + We now R-D optimize the tokens for each block after quantization, + so the rounding offset should always be 1/2, and an explicit dead + zone is unnecessary. + Hence, all of that VP3.2 code is gone from here, and the remaining + floating point code has been implemented as equivalent integer + code with exact precision.*/ + enquant=(oc_iquant *)_enquant; + for(zzi=0;zzi<64;zzi++)oc_iquant_init(enquant+zzi,_dequant[zzi]); +} + +void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis){ int pli; + int qii; int qti; - /*Initialize the dequantization tables first.*/ - oc_dequant_tables_init(_dequant,NULL,_qinfo); - /*Derive the quantization tables directly from the dequantization tables.*/ - for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - int zzi; - int plj; - int qtj; - int dupe; - dupe=0; - for(qtj=0;qtj<=qti;qtj++){ - for(plj=0;plj<(qtj<qti?3:pli);plj++){ - if(_dequant[qi][pli][qti]==_dequant[qi][plj][qtj]){ - dupe=1; - break; - } - } - if(dupe)break; - } - if(dupe){ - _enquant[qi][pli][qti]=_enquant[qi][plj][qtj]; - continue; - } - /*In the original VP3.2 code, the rounding offset and the size of the - dead zone around 0 were controlled by a "sharpness" parameter. - We now R-D optimize the tokens for each block after quantization, - so the rounding offset should always be 1/2, and an explicit dead - zone is unnecessary. - Hence, all of that VP3.2 code is gone from here, and the remaining - floating point code has been implemented as equivalent integer - code with exact precision.*/ - for(zzi=0;zzi<64;zzi++){ - oc_iquant_init(_enquant[qi][pli][qti]+zzi, - _dequant[qi][pli][qti][zzi]); + for(pli=0;pli<3;pli++)for(qii=1;qii<_nqis;qii++)for(qti=0;qti<2;qti++){ + *((oc_iquant *)_enquant[pli][qii][qti])= + *((oc_iquant *)_enquant[pli][0][qti]); + } +} + +int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64], + const ogg_uint16_t _dequant[64],const void *_enquant){ + const oc_iquant *enquant; + int nonzero; + int zzi; + int val; + int d; + int s; + enquant=(const oc_iquant *)_enquant; + nonzero=0; + for(zzi=0;zzi<64;zzi++){ + val=_dct[zzi]; + d=_dequant[zzi]; + val=val<<1; + if(abs(val)>=d){ + s=OC_SIGNMASK(val); + /*The bias added here rounds ties away from zero, since token + optimization can only decrease the magnitude of the quantized + value.*/ + val+=d+s^s; + /*Note the arithmetic right shift is not guaranteed by ANSI C. + Hopefully no one still uses ones-complement architectures.*/ + val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; + _qdct[zzi]=(ogg_int16_t)val; + nonzero=zzi; } + else _qdct[zzi]=0; } + return nonzero; } @@ -226,7 +302,7 @@ static const ogg_uint16_t OC_RPSD[2][64]={ relative to the total, scaled by 2**16, for each pixel format. These values were measured after motion-compensated prediction, before quantization, over a large set of test video encoded at all possible rates. - TODO: These values are only from INTER frames; it should be re-measured for + TODO: These values are only from INTER frames; they should be re-measured for INTRA frames.*/ static const ogg_uint16_t OC_PCD[4][3]={ {59926, 3038, 2572}, @@ -236,38 +312,58 @@ static const ogg_uint16_t OC_PCD[4][3]={ }; -/*Compute an "average" quantizer for each qi level. - We do one for INTER and one for INTRA, since their behavior is very - different, but average across chroma channels. +/*Compute "average" quantizers for each qi level to use for rate control. + We do one for each color channel, as well as an average across color + channels, separately for INTER and INTRA, since their behavior is very + different. The basic approach is to compute a harmonic average of the squared quantizer, weighted by the expected squared magnitude of the DCT coefficients. Under the (not quite true) assumption that DCT coefficients are Laplacian-distributed, this preserves the product Q*lambda, where lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be confused with the lambda used in R-D optimization throughout most of the - rest of the code). - The value Q*lambda completely determines the entropy of the coefficients.*/ + rest of the code), when the distributions from multiple coefficients are + pooled. + The value Q*lambda completely determines the entropy of coefficients drawn + from a Laplacian distribution, and thus the expected bitrate.*/ void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], + ogg_int16_t _log_plq[64][3][2],ogg_uint16_t _chroma_rd_scale[2][64][2], ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){ int qi; int pli; int qti; int ci; for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){ - ogg_int64_t q2; + ogg_int64_t q2; + ogg_uint32_t qp[3]; + ogg_uint32_t cqp; + ogg_uint32_t d; q2=0; for(pli=0;pli<3;pli++){ - ogg_uint32_t qp; - qp=0; + qp[pli]=0; for(ci=0;ci<64;ci++){ unsigned rq; unsigned qd; qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]]; rq=(OC_RPSD[qti][ci]+(qd>>1))/qd; - qp+=rq*(ogg_uint32_t)rq; + qp[pli]+=rq*(ogg_uint32_t)rq; } - q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp; + q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp[pli]; + /*plq=1.0/sqrt(qp)*/ + _log_plq[qi][pli][qti]= + (ogg_int16_t)(OC_Q10(32)-oc_blog32_q10(qp[pli])>>1); } + d=OC_PCD[_pixel_fmt][1]+OC_PCD[_pixel_fmt][2]; + cqp=(ogg_uint32_t)((OC_PCD[_pixel_fmt][1]*(ogg_int64_t)qp[1]+ + OC_PCD[_pixel_fmt][2]*(ogg_int64_t)qp[2]+(d>>1))/d); + /*chroma_rd_scale=clamp(0.25,cqp/qp[0],4)*/ + d=OC_MAXI(qp[0]+(1<<OC_RD_SCALE_BITS-1)>>OC_RD_SCALE_BITS,1); + d=OC_CLAMPI(1<<OC_RD_SCALE_BITS-2,(cqp+(d>>1))/d,4<<OC_RD_SCALE_BITS); + _chroma_rd_scale[qti][qi][0]=(ogg_int16_t)d; + /*chroma_rd_iscale=clamp(0.25,qp[0]/cqp,4)*/ + d=OC_MAXI(OC_RD_ISCALE(cqp,1),1); + d=OC_CLAMPI(1<<OC_RD_ISCALE_BITS-2,(qp[0]+(d>>1))/d,4<<OC_RD_ISCALE_BITS); + _chroma_rd_scale[qti][qi][1]=(ogg_int16_t)d; /*qavg=1.0/sqrt(q2).*/ _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; } |