summaryrefslogtreecommitdiff
path: root/thirdparty/libtheora/encint.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/libtheora/encint.h')
-rw-r--r--thirdparty/libtheora/encint.h502
1 files changed, 427 insertions, 75 deletions
diff --git a/thirdparty/libtheora/encint.h b/thirdparty/libtheora/encint.h
index 97897d5a04..d25de4b8f6 100644
--- a/thirdparty/libtheora/encint.h
+++ b/thirdparty/libtheora/encint.h
@@ -11,17 +11,13 @@
********************************************************************
function:
- last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
+ last mod: $Id$
********************************************************************/
#if !defined(_encint_H)
# define _encint_H (1)
-# if defined(HAVE_CONFIG_H)
-# include "config.h"
-# endif
# include "theora/theoraenc.h"
-# include "internal.h"
-# include "ocintrin.h"
+# include "state.h"
# include "mathops.h"
# include "enquant.h"
# include "huffenc.h"
@@ -32,8 +28,13 @@
typedef oc_mv oc_mv2[2];
typedef struct oc_enc_opt_vtable oc_enc_opt_vtable;
+typedef struct oc_enc_opt_data oc_enc_opt_data;
typedef struct oc_mb_enc_info oc_mb_enc_info;
typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
+typedef struct oc_fr_state oc_fr_state;
+typedef struct oc_qii_state oc_qii_state;
+typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
+typedef struct oc_mode_rd oc_mode_rd;
typedef struct oc_iir_filter oc_iir_filter;
typedef struct oc_frame_metrics oc_frame_metrics;
typedef struct oc_rc_state oc_rc_state;
@@ -42,6 +43,170 @@ typedef struct oc_token_checkpoint oc_token_checkpoint;
+/*Encoder-specific accelerated functions.*/
+# if defined(OC_X86_ASM)
+# if defined(_MSC_VER)
+# include "x86_vc/x86enc.h"
+# else
+# include "x86/x86enc.h"
+# endif
+# endif
+# if defined(OC_ARM_ASM)
+# include "arm/armenc.h"
+# endif
+
+# if !defined(oc_enc_accel_init)
+# define oc_enc_accel_init oc_enc_accel_init_c
+# endif
+# if defined(OC_ENC_USE_VTABLE)
+# if !defined(oc_enc_frag_sub)
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_sub_128)
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
+# endif
+# if !defined(oc_enc_frag_sad)
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_sad_thresh)
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+ ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
+# endif
+# if !defined(oc_enc_frag_sad2_thresh)
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+ ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
+# endif
+# if !defined(oc_enc_frag_intra_sad)
+# define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
+ ((*(_enc)->opt_vtable.frag_intra_sad)(_src,_ystride))
+# endif
+# if !defined(oc_enc_frag_satd)
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_satd2)
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+ ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
+# endif
+# if !defined(oc_enc_frag_intra_satd)
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+ ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
+# endif
+# if !defined(oc_enc_frag_ssd)
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_border_ssd)
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+ ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
+# endif
+# if !defined(oc_enc_frag_copy2)
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+ ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
+# endif
+# if !defined(oc_enc_enquant_table_init)
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+ ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
+# endif
+# if !defined(oc_enc_enquant_table_fixup)
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+ ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
+# endif
+# if !defined(oc_enc_quantize)
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+ ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
+# endif
+# if !defined(oc_enc_frag_recon_intra)
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+ ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
+# endif
+# if !defined(oc_enc_frag_recon_inter)
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+ ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+# endif
+# if !defined(oc_enc_fdct8x8)
+# define oc_enc_fdct8x8(_enc,_y,_x) \
+ ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
+# endif
+# else
+# if !defined(oc_enc_frag_sub)
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+ oc_enc_frag_sub_c(_diff,_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_sub_128)
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+ oc_enc_frag_sub_128_c(_diff,_src,_ystride)
+# endif
+# if !defined(oc_enc_frag_sad)
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_sad_c(_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_sad_thresh)
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+ oc_enc_frag_sad_thresh_c(_src,_ref,_ystride,_thresh)
+# endif
+# if !defined(oc_enc_frag_sad2_thresh)
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+ oc_enc_frag_sad2_thresh_c(_src,_ref1,_ref2,_ystride,_thresh)
+# endif
+# if !defined(oc_enc_frag_intra_sad)
+# define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
+ oc_enc_frag_intra_sad_c(_src,_ystride)
+# endif
+# if !defined(oc_enc_frag_satd)
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+ oc_enc_frag_satd_c(_dc,_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_satd2)
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+ oc_enc_frag_satd2_c(_dc,_src,_ref1,_ref2,_ystride)
+# endif
+# if !defined(oc_enc_frag_intra_satd)
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+ oc_enc_frag_intra_satd_c(_dc,_src,_ystride)
+# endif
+# if !defined(oc_enc_frag_ssd)
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_ssd_c(_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_border_ssd)
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+ oc_enc_frag_border_ssd_c(_src,_ref,_ystride,_mask)
+# endif
+# if !defined(oc_enc_frag_copy2)
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+ oc_enc_frag_copy2_c(_dst,_src1,_src2,_ystride)
+# endif
+# if !defined(oc_enc_enquant_table_init)
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+ oc_enc_enquant_table_init_c(_enquant,_dequant)
+# endif
+# if !defined(oc_enc_enquant_table_fixup)
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+ oc_enc_enquant_table_fixup_c(_enquant,_nqis)
+# endif
+# if !defined(oc_enc_quantize)
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+ oc_enc_quantize_c(_qdct,_dct,_dequant,_enquant)
+# endif
+# if !defined(oc_enc_frag_recon_intra)
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+ oc_frag_recon_intra_c(_dst,_ystride,_residue)
+# endif
+# if !defined(oc_enc_frag_recon_inter)
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+# endif
+# if !defined(oc_enc_fdct8x8)
+# define oc_enc_fdct8x8(_enc,_y,_x) oc_enc_fdct8x8_c(_y,_x)
+# endif
+# endif
+
+
+
/*Constants for the packet-out state machine specific to the encoder.*/
/*Next packet to emit: Data packet, but none are ready yet.*/
@@ -50,13 +215,61 @@ typedef struct oc_token_checkpoint oc_token_checkpoint;
#define OC_PACKET_READY (1)
/*All features enabled.*/
-#define OC_SP_LEVEL_SLOW (0)
+#define OC_SP_LEVEL_SLOW (0)
/*Enable early skip.*/
-#define OC_SP_LEVEL_EARLY_SKIP (1)
+#define OC_SP_LEVEL_EARLY_SKIP (1)
+/*Use analysis shortcuts, single quantizer, and faster tokenization.*/
+#define OC_SP_LEVEL_FAST_ANALYSIS (2)
+/*Use SAD instead of SATD*/
+#define OC_SP_LEVEL_NOSATD (3)
/*Disable motion compensation.*/
-#define OC_SP_LEVEL_NOMC (2)
+#define OC_SP_LEVEL_NOMC (4)
/*Maximum valid speed level.*/
-#define OC_SP_LEVEL_MAX (2)
+#define OC_SP_LEVEL_MAX (4)
+
+
+/*The number of extra bits of precision at which to store rate metrics.*/
+# define OC_BIT_SCALE (6)
+/*The number of extra bits of precision at which to store RMSE metrics.
+ This must be at least half OC_BIT_SCALE (rounded up).*/
+# define OC_RMSE_SCALE (5)
+/*The number of quantizer bins to partition statistics into.*/
+# define OC_LOGQ_BINS (8)
+/*The number of SAD/SATD bins to partition statistics into.*/
+# define OC_COMP_BINS (24)
+/*The number of bits of precision to drop from SAD and SATD scores
+ to assign them to a bin.*/
+# define OC_SAD_SHIFT (6)
+# define OC_SATD_SHIFT (9)
+
+/*Masking is applied by scaling the D used in R-D optimization (via rd_scale)
+ or the lambda parameter (via rd_iscale).
+ These are only equivalent within a single block; when more than one block is
+ being considered, the former is the interpretation used.*/
+
+/*This must be at least 4 for OC_RD_SKIP_SCALE() to work below.*/
+# define OC_RD_SCALE_BITS (12-OC_BIT_SCALE)
+# define OC_RD_ISCALE_BITS (11)
+
+/*This macro is applied to _ssd values with just 4 bits of headroom
+ ((15-OC_RMSE_SCALE)*2+OC_BIT_SCALE+2); since we want to allow rd_scales as
+ large as 16, and need additional fractional bits, our only recourse that
+ doesn't lose precision on blocks with very small SSDs is to use a wider
+ multiply.*/
+# if LONG_MAX>2147483647
+# define OC_RD_SCALE(_ssd,_rd_scale) \
+ ((unsigned)((unsigned long)(_ssd)*(_rd_scale) \
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
+# else
+# define OC_RD_SCALE(_ssd,_rd_scale) \
+ (((_ssd)>>OC_RD_SCALE_BITS)*(_rd_scale) \
+ +(((_ssd)&(1<<OC_RD_SCALE_BITS)-1)*(_rd_scale) \
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
+# endif
+# define OC_RD_SKIP_SCALE(_ssd,_rd_scale) \
+ ((_ssd)*(_rd_scale)+((1<<OC_RD_SCALE_BITS-4)>>1)>>OC_RD_SCALE_BITS-4)
+# define OC_RD_ISCALE(_lambda,_rd_iscale) \
+ ((_lambda)*(_rd_iscale)+((1<<OC_RD_ISCALE_BITS)>>1)>>OC_RD_ISCALE_BITS)
/*The bits used for each of the MB mode codebooks.*/
@@ -78,6 +291,10 @@ extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
/*Encoder specific functions with accelerated variants.*/
struct oc_enc_opt_vtable{
+ void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+ void (*frag_sub_128)(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
unsigned (*frag_sad)(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned (*frag_sad_thresh)(const unsigned char *_src,
@@ -85,18 +302,23 @@ struct oc_enc_opt_vtable{
unsigned (*frag_sad2_thresh)(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
- unsigned (*frag_satd_thresh)(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
- unsigned (*frag_satd2_thresh)(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
- unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
- void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+ unsigned (*frag_intra_sad)(const unsigned char *_src,int _ystride);
+ unsigned (*frag_satd)(int *_dc,const unsigned char *_src,
const unsigned char *_ref,int _ystride);
- void (*frag_sub_128)(ogg_int16_t _diff[64],
- const unsigned char *_src,int _ystride);
+ unsigned (*frag_satd2)(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+ unsigned (*frag_intra_satd)(int *_dc,const unsigned char *_src,int _ystride);
+ unsigned (*frag_ssd)(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+ unsigned (*frag_border_ssd)(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
void (*frag_copy2)(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+ void (*enquant_table_init)(void *_enquant,
+ const ogg_uint16_t _dequant[64]);
+ void (*enquant_table_fixup)(void *_enquant[3][3][2],int _nqis);
+ int (*quantize)(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant);
void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
const ogg_int16_t _residue[64]);
void (*frag_recon_inter)(unsigned char *_dst,
@@ -105,7 +327,19 @@ struct oc_enc_opt_vtable{
};
-void oc_enc_vtable_init(oc_enc_ctx *_enc);
+/*Encoder specific data that varies according to which variants of the above
+ functions are used.*/
+struct oc_enc_opt_data{
+ /*The size of a single quantizer table.
+ This must be a multiple of enquant_table_alignment.*/
+ size_t enquant_table_size;
+ /*The alignment required for the quantizer tables.
+ This must be a positive power of two.*/
+ int enquant_table_alignment;
+};
+
+
+void oc_enc_accel_init(oc_enc_ctx *_enc);
@@ -158,7 +392,7 @@ struct oc_mode_scheme_chooser{
corresponds to the ranks above.*/
unsigned char scheme0_list[OC_NMODES];
/*The number of times each mode has been chosen so far.*/
- int mode_counts[OC_NMODES];
+ unsigned mode_counts[OC_NMODES];
/*The list of mode coding schemes, sorted in ascending order of bit cost.*/
unsigned char scheme_list[8];
/*The number of bits used by each mode coding scheme.*/
@@ -170,6 +404,106 @@ void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+/*State to track coded block flags and their bit cost.
+ We use opportunity cost to measure the bits required to code or skip the next
+ block, using the cheaper of the cost to code it fully or partially, so long
+ as both are possible.*/
+struct oc_fr_state{
+ /*The number of bits required for the coded block flags so far this frame.*/
+ ptrdiff_t bits;
+ /*The length of the current run for the partial super block flag, not
+ including the current super block.*/
+ unsigned sb_partial_count:16;
+ /*The length of the current run for the full super block flag, not
+ including the current super block.*/
+ unsigned sb_full_count:16;
+ /*The length of the coded block flag run when the current super block
+ started.*/
+ unsigned b_coded_count_prev:6;
+ /*The coded block flag when the current super block started.*/
+ signed int b_coded_prev:2;
+ /*The length of the current coded block flag run.*/
+ unsigned b_coded_count:6;
+ /*The current coded block flag.*/
+ signed int b_coded:2;
+ /*The number of blocks processed in the current super block.*/
+ unsigned b_count:5;
+ /*Whether or not it is cheaper to code the current super block partially,
+ even if it could still be coded fully.*/
+ unsigned sb_prefer_partial:1;
+ /*Whether the last super block was coded partially.*/
+ signed int sb_partial:2;
+ /*The number of bits required for the flags for the current super block.*/
+ unsigned sb_bits:6;
+ /*Whether the last non-partial super block was coded fully.*/
+ signed int sb_full:2;
+};
+
+
+
+struct oc_qii_state{
+ ptrdiff_t bits;
+ unsigned qi01_count:14;
+ signed int qi01:2;
+ unsigned qi12_count:14;
+ signed int qi12:2;
+};
+
+
+
+/*Temporary encoder state for the analysis pipeline.*/
+struct oc_enc_pipeline_state{
+ /*DCT coefficient storage.
+ This is kept off the stack because a) gcc can't align things on the stack
+ reliably on ARM, and b) it avoids (unintentional) data hazards between
+ ARM and NEON code.*/
+ OC_ALIGN16(ogg_int16_t dct_data[64*3]);
+ OC_ALIGN16(signed char bounding_values[256]);
+ oc_fr_state fr[3];
+ oc_qii_state qs[3];
+ /*Skip SSD storage for the current MCU in each plane.*/
+ unsigned *skip_ssd[3];
+ /*Coded/uncoded fragment lists for each plane for the current MCU.*/
+ ptrdiff_t *coded_fragis[3];
+ ptrdiff_t *uncoded_fragis[3];
+ ptrdiff_t ncoded_fragis[3];
+ ptrdiff_t nuncoded_fragis[3];
+ /*The starting fragment for the current MCU in each plane.*/
+ ptrdiff_t froffset[3];
+ /*The starting row for the current MCU in each plane.*/
+ int fragy0[3];
+ /*The ending row for the current MCU in each plane.*/
+ int fragy_end[3];
+ /*The starting superblock for the current MCU in each plane.*/
+ unsigned sbi0[3];
+ /*The ending superblock for the current MCU in each plane.*/
+ unsigned sbi_end[3];
+ /*The number of tokens for zzi=1 for each color plane.*/
+ int ndct_tokens1[3];
+ /*The outstanding eob_run count for zzi=1 for each color plane.*/
+ int eob_run1[3];
+ /*Whether or not the loop filter is enabled.*/
+ int loop_filter;
+};
+
+
+
+/*Statistics used to estimate R-D cost of a block in a given coding mode.
+ See modedec.h for more details.*/
+struct oc_mode_rd{
+ /*The expected bits used by the DCT tokens, shifted by OC_BIT_SCALE.*/
+ ogg_int16_t rate;
+ /*The expected square root of the sum of squared errors, shifted by
+ OC_RMSE_SCALE.*/
+ ogg_int16_t rmse;
+};
+
+# if defined(OC_COLLECT_METRICS)
+# include "collect.h"
+# endif
+
+
+
/*A 2nd order low-pass Bessel follower.
We use this for rate control because it has fast reaction time, but is
critically damped.*/
@@ -190,6 +524,8 @@ struct oc_frame_metrics{
unsigned dup_count:31;
/*The frame type from pass 1.*/
unsigned frame_type:1;
+ /*The frame activity average from pass 1.*/
+ unsigned activity_avg;
};
@@ -335,10 +671,15 @@ struct th_enc_ctx{
size_t mv_bits[2];
/*The mode scheme chooser for estimating mode coding costs.*/
oc_mode_scheme_chooser chooser;
+ /*Temporary encoder state for the analysis pipeline.*/
+ oc_enc_pipeline_state pipe;
/*The number of vertical super blocks in an MCU.*/
int mcu_nvsbs;
/*The SSD error for skipping each fragment in the current MCU.*/
unsigned *mcu_skip_ssd;
+ /*The masking scale factors for chroma blocks in the current MCU.*/
+ ogg_uint16_t *mcu_rd_scale;
+ ogg_uint16_t *mcu_rd_iscale;
/*The DCT token lists for each coefficient and each plane.*/
unsigned char **dct_tokens[3];
/*The extra bits associated with each DCT token.*/
@@ -350,8 +691,10 @@ struct th_enc_ctx{
/*The offset of the first DCT token for each coefficient for each plane.*/
unsigned char dct_token_offs[3][64];
/*The last DC coefficient for each plane and reference frame.*/
- int dc_pred_last[3][3];
+ int dc_pred_last[3][4];
#if defined(OC_COLLECT_METRICS)
+ /*Fragment SAD statistics for MB mode estimation metrics.*/
+ unsigned *frag_sad;
/*Fragment SATD statistics for MB mode estimation metrics.*/
unsigned *frag_satd;
/*Fragment SSD statistics for MB mode estimation metrics.*/
@@ -359,32 +702,56 @@ struct th_enc_ctx{
#endif
/*The R-D optimization parameter.*/
int lambda;
+ /*The average block "activity" of the previous frame.*/
+ unsigned activity_avg;
+ /*The average MB luma of the previous frame.*/
+ unsigned luma_avg;
/*The huffman tables in use.*/
th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
/*The quantization parameters in use.*/
th_quant_info qinfo;
- oc_iquant *enquant_tables[64][3][2];
- oc_iquant_table enquant_table_data[64][3][2];
- /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
- value.
- This is used to paramterize the rate control decisions.
+ /*The original DC coefficients saved off from the dequatization tables.*/
+ ogg_uint16_t dequant_dc[64][3][2];
+ /*Condensed dequantization tables.*/
+ const ogg_uint16_t *dequant[3][3][2];
+ /*Condensed quantization tables.*/
+ void *enquant[3][3][2];
+ /*The full set of quantization tables.*/
+ void *enquant_tables[64][3][2];
+ /*Storage for the quantization tables.*/
+ unsigned char *enquant_table_data;
+ /*An "average" quantizer for each frame type (INTRA or INTER) and qi value.
+ This is used to parameterize the rate control decisions.
They are kept in the log domain to simplify later processing.
- Keep in mind these are DCT domain quantizers, and so are scaled by an
- additional factor of 4 from the pixel domain.*/
+ These are DCT domain quantizers, and so are scaled by an additional factor
+ of 4 from the pixel domain.*/
ogg_int64_t log_qavg[2][64];
+ /*The "average" quantizer futher partitioned by color plane.
+ This is used to parameterize mode decision.
+ These are DCT domain quantizers, and so are scaled by an additional factor
+ of 4 from the pixel domain.*/
+ ogg_int16_t log_plq[64][3][2];
+ /*The R-D scale factors to apply to chroma blocks for a given frame type
+ (INTRA or INTER) and qi value.
+ The first is the "D" modifier (rd_scale), while the second is the "lambda"
+ modifier (rd_iscale).*/
+ ogg_uint16_t chroma_rd_scale[2][64][2];
+ /*The interpolated mode decision R-D lookup tables for the current
+ quantizers, color plane, and quantization type.*/
+ oc_mode_rd mode_rd[3][3][2][OC_COMP_BINS];
/*The buffer state used to drive rate control.*/
oc_rc_state rc;
+# if defined(OC_ENC_USE_VTABLE)
/*Table for encoder acceleration functions.*/
oc_enc_opt_vtable opt_vtable;
+# endif
+ /*Table for encoder data used by accelerated functions.*/
+ oc_enc_opt_data opt_data;
};
void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
-#if defined(OC_COLLECT_METRICS)
-void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
-void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
-#endif
@@ -415,8 +782,13 @@ struct oc_token_checkpoint{
void oc_enc_tokenize_start(oc_enc_ctx *_enc);
int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
- ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
- int _zzi,oc_token_checkpoint **_stack,int _acmin);
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
const oc_token_checkpoint *_stack,int _n);
void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
@@ -436,45 +808,13 @@ int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
-/*Encoder-specific accelerated functions.*/
-void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
- const unsigned char *_src,const unsigned char *_ref,int _ystride);
-void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
- const unsigned char *_src,int _ystride);
-unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
- const unsigned char *_ref,int _ystride);
-unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
- const unsigned char *_src,int _ystride);
-void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
-void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
- unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
-void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
- const ogg_int16_t _x[64]);
-
-/*Default pure-C implementations.*/
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
+/*Default pure-C implementations of encoder-specific accelerated functions.*/
+void oc_enc_accel_init_c(oc_enc_ctx *_enc);
void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride);
void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
const unsigned char *_src,int _ystride);
-void oc_enc_frag_copy2_c(unsigned char *_dst,
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
unsigned oc_enc_frag_sad_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
@@ -482,12 +822,24 @@ unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_intra_sad_c(const unsigned char *_src, int _ystride);
+unsigned oc_enc_frag_satd_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_c(int *_dc,
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_border_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_enquant_table_init_c(void *_enquant,
+ const ogg_uint16_t _dequant[64]);
+void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis);
+int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant);
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
#endif